From: Chunseok Lee Date: Wed, 7 Sep 2022 10:04:21 +0000 (+0900) Subject: Imported Upstream version 1.21.0 X-Git-Tag: tizen_7.0_m2_release X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Ftags%2Ftizen_7.0_m2_release;p=platform%2Fcore%2Fml%2Fnnfw.git Imported Upstream version 1.21.0 --- diff --git a/.ahub/sam/exclude.txt b/.ahub/sam/exclude.txt index c9ba5e0..f16f84f 100644 --- a/.ahub/sam/exclude.txt +++ b/.ahub/sam/exclude.txt @@ -5,6 +5,22 @@ # Eigen /ONE/compiler/nnc/backends/soft_backend/code_snippets/eigen.def +# Frontend test tools that are needed for release package build +/ONE/compiler/circlechef +/ONE/compiler/circle-verify +/ONE/compiler/luci/tester + +# Exclude IR headers which have lots of similar patterns +# TODO remove this when refactoring is possible +/ONE/compiler/luci/lang/include/luci/IR/Nodes +/ONE/compiler/luci/import/include/luci/Import/Nodes +/ONE/compiler/loco/include/loco/IR +/ONE/compiler/tflchef/tflite/src/Op/include + +# Exclude interpreter kernels which have similar patterns +/ONE/compiler/luci-interpreter/src/kernels +/ONE/compiler/locomotiv/src/Node + # Test codes /ONE/tests diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml index 95e11d0..73ec548 100644 --- a/.ahub/tcchecker-tca/config.yaml +++ b/.ahub/tcchecker-tca/config.yaml @@ -4,30 +4,23 @@ test: testCaseLanguage: CPP testFW: GTEST testCaseFolder: - - /compute/test/cker - - /runtime/onert/core/src/backend/basic - - /runtime/onert/frontend/nnapi - - /runtime/onert/test/core/compiler - - /runtime/onert/test/core/exec - - /runtime/onert/test/core/interp - - /runtime/onert/test/graph - - /runtime/onert/test/graph/operand - - /runtime/onert/test/graph/operation - - /runtime/onert/test/graph/verifier - - /runtime/onert/test/ir - - /runtime/onert/test/util - - /tests/nnfw_api/src + - /compute/cker + - /runtime/libs/misc + - /runtime/libs/ndarray + - /runtime/onert + - /tests/nnfw_api testFile: - - extension: cpp + - extension: test.cpp any: true - - extension: cc + - extension: test.cc any: true testCase: - condition: - functionName: starts: - TEST + - TYPED_TEST - excludes : - Verifier.dag_checker - graph_operand_LayoutSet.layout_set_operators diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml index 38c76dc..a3f4c1c 100644 --- a/.github/workflows/check-pr-commit.yml +++ b/.github/workflows/check-pr-commit.yml @@ -5,6 +5,11 @@ on: branches: - master - release/* + types: + - opened + - synchronize + - reopened + - ready_for_review defaults: run: @@ -14,6 +19,8 @@ jobs: check-commit-message: name: Check commit message runs-on: ubuntu-20.04 + # Skip on draft, check on draft -> ready + if: github.event.pull_request.draft == false steps: - name: Checkout diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h index 1703e42..43f99dc 100644 --- a/compiler/arser/include/arser/arser.h +++ b/compiler/arser/include/arser/arser.h @@ -303,7 +303,7 @@ private: std::string _long_name; std::string _short_name; std::vector _names; - std::string _type; + std::string _type = "string"; std::string _help_message; std::function _func; uint32_t _nargs{1}; @@ -540,16 +540,20 @@ public: /* ** print usage */ + auto print_usage_arg = [&](const arser::Argument &arg) { + stream << " "; + std::string arg_name = arser::internal::remove_dash(arg._long_name); + std::for_each(arg_name.begin(), arg_name.end(), + [&stream](const char &c) { stream << static_cast(::toupper(c)); }); + }; stream << "Usage: ./" << parser._program_name << " "; // required optional argument for (const auto &arg : parser._optional_arg_vec) { if (!arg._is_required) continue; - stream << arg._short_name << " "; - std::string arg_name = arser::internal::remove_dash(arg._long_name); - std::for_each(arg_name.begin(), arg_name.end(), - [&stream](const char &c) { stream << static_cast(::toupper(c)); }); + stream << arg._short_name; + print_usage_arg(arg); stream << " "; } // rest of the optional argument @@ -560,10 +564,7 @@ public: stream << "[" << arg._short_name; if (arg._nargs) { - stream << " "; - std::string arg_name = arser::internal::remove_dash(arg._long_name); - std::for_each(arg_name.begin(), arg_name.end(), - [&stream](const char &c) { stream << static_cast(::toupper(c)); }); + print_usage_arg(arg); } stream << "]" << " "; @@ -591,39 +592,28 @@ public: } const size_t message_width = 60; - // positional argument - if (!parser._positional_arg_vec.empty()) - { - stream << "[Positional argument]" << std::endl; - for (const auto &arg : parser._positional_arg_vec) + auto print_help_args = [&](const std::list &args, const std::string &title) { + if (!args.empty()) { - stream.width(length_of_longest_arg); - stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; - for (size_t i = 0; i < arg._help_message.length(); i += message_width) + stream << title << std::endl; + for (const auto &arg : args) { - if (i) - stream << std::string(length_of_longest_arg, ' ') << "\t"; - stream << arg._help_message.substr(i, message_width) << std::endl; + stream.width(length_of_longest_arg); + stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; + for (size_t i = 0; i < arg._help_message.length(); i += message_width) + { + if (i) + stream << std::string(length_of_longest_arg, ' ') << "\t"; + stream << arg._help_message.substr(i, message_width) << std::endl; + } } + std::cout << std::endl; } - std::cout << std::endl; - } + }; + // positional argument + print_help_args(parser._positional_arg_vec, "[Positional argument]"); // optional argument - if (!parser._optional_arg_vec.empty()) - { - stream << "[Optional argument]" << std::endl; - for (const auto &arg : parser._optional_arg_vec) - { - stream.width(length_of_longest_arg); - stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; - for (size_t i = 0; i < arg._help_message.length(); i += message_width) - { - if (i) - stream << std::string(length_of_longest_arg, ' ') << "\t"; - stream << arg._help_message.substr(i, message_width) << std::endl; - } - } - } + print_help_args(parser._optional_arg_vec, "[Optional argument]"); return stream; } @@ -737,6 +727,29 @@ template T Arser::get(const std::string &arg_name) return get_impl(arg_name, static_cast(nullptr)); } +class Helper +{ +public: + static void add_version(Arser &arser, const std::function &func) + { + arser.add_argument("--version") + .nargs(0) + .required(false) + .default_value(false) + .help("Show version information and exit") + .exit_with(func); + } + + static void add_verbose(Arser &arser) + { + arser.add_argument("-V", "--verbose") + .nargs(0) + .required(false) + .default_value(false) + .help("output additional information to stdout or stderr"); + } +}; + } // namespace arser #endif // __ARSER_H__ diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt index 4d86f80..d5a6230 100644 --- a/compiler/circle-eval-diff/CMakeLists.txt +++ b/compiler/circle-eval-diff/CMakeLists.txt @@ -6,6 +6,7 @@ list(REMOVE_ITEM SOURCES ${TESTS}) add_executable(circle-eval-diff ${DRIVER} ${SOURCES}) target_include_directories(circle-eval-diff PRIVATE include) +target_include_directories(circle-eval-diff PRIVATE src) target_link_libraries(circle-eval-diff arser) target_link_libraries(circle-eval-diff safemain) @@ -17,6 +18,8 @@ target_link_libraries(circle-eval-diff luci_interpreter) target_link_libraries(circle-eval-diff dio_hdf5) target_link_libraries(circle-eval-diff vconone) +install(TARGETS circle-eval-diff DESTINATION bin) + if(NOT ENABLE_TEST) return() endif(NOT ENABLE_TEST) @@ -25,10 +28,15 @@ endif(NOT ENABLE_TEST) # Instead, we use TEST_SOURCES to specify sources uesd for tests. set(TEST_SOURCES "src/MetricPrinter.cpp" - "src/Tensor.cpp") + "src/Tensor.cpp" + "src/InputDataLoader.cpp") nnas_find_package(GTest REQUIRED) GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES}) +target_include_directories(circle_eval_diff_test PRIVATE include) target_include_directories(circle_eval_diff_test PRIVATE src) target_link_libraries(circle_eval_diff_test luci_testhelper) target_link_libraries(circle_eval_diff_test nncc_coverage) +target_link_libraries(circle_eval_diff_test dio_hdf5) +target_link_libraries(circle_eval_diff_test loco) +target_link_libraries(circle_eval_diff_test luci_lang) diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp index f4a12a4..7e63ec8 100644 --- a/compiler/circle-eval-diff/driver/Driver.cpp +++ b/compiler/circle-eval-diff/driver/Driver.cpp @@ -30,19 +30,15 @@ std::string to_lower_case(std::string s) return s; } -Metric to_metric(const std::string &str) -{ - if (to_lower_case(str).compare("mae") == 0) - return Metric::MAE; - - throw std::runtime_error("Unsupported metric."); -} - InputFormat to_input_format(const std::string &str) { - if (to_lower_case(str).compare("h5") == 0) + auto small_str = to_lower_case(str); + if (small_str.compare("h5") == 0) return InputFormat::H5; + if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0) + return InputFormat::DIR; + throw std::runtime_error("Unsupported input format."); } @@ -58,50 +54,50 @@ int entry(const int argc, char **argv) { arser::Arser arser("Compare inference results of two circle models"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); + arser::Helper::add_version(arser, print_version); - arser.add_argument("--first_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("First input model filepath"); + arser.add_argument("--first_model").required(true).help("First input model filepath"); - arser.add_argument("--second_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Second input model filepath"); + arser.add_argument("--second_model").required(true).help("Second input model filepath"); arser.add_argument("--first_input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath for the first model. If not given, circle-eval-diff will run with " "randomly generated data"); arser.add_argument("--second_input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath for the second model. If not given, circle-eval-diff will run with " "randomly generated data"); - arser.add_argument("--metric") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .default_value("MAE") - .help("Metric for comparison (default: MAE)"); + arser.add_argument("--dump_output_with_prefix") + .help("Dump output to files. should be given as an argument. " + "Outputs are saved in ..first.output and " + "..second.output."); + + arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error"); + + arser.add_argument("--print_mape") + .nargs(0) + .default_value(false) + .help("Print Mean Absolute PercentageError"); + + arser.add_argument("--print_mpeir") + .nargs(0) + .default_value(false) + .help("Print Mean Peak Error to Interval Ratio"); + + arser.add_argument("--print_top1_match") + .nargs(0) + .default_value(false) + .help("Print Mean Top-1 Match Ratio"); + + arser.add_argument("--print_top5_match") + .nargs(0) + .default_value(false) + .help("Print Mean Top-5 Match Ratio"); + + arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error"); arser.add_argument("--input_data_format") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("h5") .help("Input data format. h5/hdf5 (default) or directory"); @@ -124,6 +120,7 @@ int entry(const int argc, char **argv) std::string second_input_data_path; std::string metric; std::string input_data_format; + std::string output_prefix; if (arser["--first_input_data"]) first_input_data_path = arser.get("--first_input_data"); @@ -135,22 +132,54 @@ int entry(const int argc, char **argv) throw std::runtime_error("Input data path should be given for both first_model and " "second_model, or neither must be given."); - metric = arser.get("--metric"); + if (arser["--dump_output_with_prefix"]) + output_prefix = arser.get("--dump_output_with_prefix"); + + // Set Metrics + std::vector metrics; + if (arser["--print_mae"] and arser.get("--print_mae")) + { + metrics.emplace_back(Metric::MAE); + } + if (arser["--print_mape"] and arser.get("--print_mape")) + { + metrics.emplace_back(Metric::MAPE); + } + if (arser["--print_mpeir"] and arser.get("--print_mpeir")) + { + metrics.emplace_back(Metric::MPEIR); + } + if (arser["--print_top1_match"] and arser.get("--print_top1_match")) + { + metrics.emplace_back(Metric::MTOP1); + } + if (arser["--print_top5_match"] and arser.get("--print_top5_match")) + { + metrics.emplace_back(Metric::MTOP5); + } + if (arser["--print_mse"] and arser.get("--print_mse")) + { + metrics.emplace_back(Metric::MSE); + } + input_data_format = arser.get("--input_data_format"); auto ctx = std::make_unique(); { ctx->first_model_path = first_model_path; ctx->second_model_path = second_model_path; - ctx->metric = to_metric(metric); + ctx->first_input_data_path = first_input_data_path; + ctx->second_input_data_path = second_input_data_path; + ctx->metric = metrics; ctx->input_format = to_input_format(input_data_format); + ctx->output_prefix = output_prefix; } CircleEvalDiff ced(std::move(ctx)); ced.init(); - ced.evalDiff(first_input_data_path, second_input_data_path); + ced.evalDiff(); return EXIT_SUCCESS; } diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h index bf6aff4..7894480 100644 --- a/compiler/circle-eval-diff/include/CircleEvalDiff.h +++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h @@ -20,8 +20,12 @@ #include #include +#include "InputDataLoader.h" +#include "MetricPrinter.h" + #include #include +#include namespace circle_eval_diff { @@ -32,14 +36,12 @@ class ModuleEvalDiff; enum class Metric { Undefined, // For debugging - MAE, -}; - -enum class InputFormat -{ - Undefined, // For debugging - H5, - // TODO Implement Random, Directory + MAE, // Mean Absolute Error + MAPE, // Mean Percentage Absolute Error + MPEIR, // Mean Peak Error to Interval Ratio + MTOP1, // Mean Top-1 Match Ratio + MTOP5, // Mean Top-5 Match Ratio + MSE, // Mean Squared Error }; class CircleEvalDiff final @@ -49,8 +51,11 @@ public: { std::string first_model_path; std::string second_model_path; - Metric metric = Metric::Undefined; + std::string first_input_data_path; + std::string second_input_data_path; + std::vector metric; InputFormat input_format = InputFormat::Undefined; + std::string output_prefix; }; public: @@ -61,12 +66,13 @@ public: void init(); // Evaluate two circle models for the given input data and compare the results - void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const; + void evalDiff(void) const; private: std::unique_ptr _ctx; - std::unique_ptr _runner; + std::unique_ptr _first_module; + std::unique_ptr _second_module; + std::vector> _metrics; }; } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp index c39a113..43e026b 100644 --- a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp +++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp @@ -15,8 +15,9 @@ */ #include "CircleEvalDiff.h" -#include "ModuleEvalDiff.h" +#include "InputDataLoader.h" #include "MetricPrinter.h" +#include "Tensor.h" #include #include @@ -26,6 +27,25 @@ namespace { +bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b) +{ + if (a->rank() != b->rank()) + return false; + + for (uint32_t i = 0; i < a->rank(); i++) + { + if (not(a->dim(i) == b->dim(i))) + return false; + } + + return true; +} + +bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b) +{ + return a->dtype() == b->dtype(); +} + std::unique_ptr import(const std::string &model_path) { // Load model from the file @@ -40,7 +60,12 @@ std::unique_ptr import(const std::string &model_path) throw std::runtime_error("Failed to verify circle '" + model_path + "'"); } - auto module = luci::Importer().importModule(circle::GetModel(model_data.data())); + auto circle_model = circle::GetModel(model_data.data()); + + if (not circle_model) + throw std::runtime_error("Failed to load '" + model_path + "'"); + + auto module = luci::Importer().importModule(circle_model); if (not module) throw std::runtime_error("Failed to load '" + model_path + "'"); @@ -48,50 +73,192 @@ std::unique_ptr import(const std::string &model_path) return module; } +const std::vector inputs_of(const luci::Module *module) +{ + return loco::input_nodes(module->graph()); +} + +const std::vector outputs_of(const luci::Module *module) +{ + return loco::output_nodes(module->graph()); +} + +void writeDataToFile(const std::string &filename, const char *data, size_t data_size) +{ + std::ofstream fs(filename, std::ofstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.write(data, data_size).fail()) + { + throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n"); + } +} + +void checkOutputs(const luci::Module *first, const luci::Module *second) +{ + const auto first_output = outputs_of(first); + const auto second_output = outputs_of(second); + + if (first_output.size() != second_output.size()) + throw std::runtime_error("Models have different output counts"); + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast(first_output[i]); + const auto second_node = loco::must_cast(second_output[i]); + + if (not same_shape(first_node, second_node)) + throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " + + second_node->name() + ")"); + + if (not same_dtype(first_node, second_node)) + throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " + + second_node->name() + ")"); + } +} + } // namespace namespace circle_eval_diff { -CircleEvalDiff::CircleEvalDiff(std::unique_ptr &&ctx) - : _ctx(std::move(ctx)), _runner(nullptr) +std::vector> interpret(const luci::Module *module, + const InputDataLoader::Data &data) +{ + auto interpreter = std::make_unique(module); + + auto input_nodes = ::inputs_of(module); + auto output_nodes = ::outputs_of(module); + + for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++) + { + auto input_node = loco::must_cast(input_nodes[input_idx]); + assert(input_node->index() == input_idx); + + auto input_data = data.at(input_idx); + interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size()); + } + + interpreter->interpret(); + + std::vector> outputs; + for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++) + { + auto output_node = loco::must_cast(output_nodes[output_idx]); + assert(output_node->index() == output_idx); + + auto tensor = createEmptyTensor(output_node); + interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size()); + outputs.emplace_back(tensor); + } + + return outputs; +} + +CircleEvalDiff::CircleEvalDiff(std::unique_ptr &&ctx) : _ctx(std::move(ctx)) { + // DO NOTHING } CircleEvalDiff::~CircleEvalDiff() = default; void CircleEvalDiff::init() { + _first_module = import(_ctx->first_model_path); + _second_module = import(_ctx->second_model_path); + + // Check modules have the same output signature (dtype/shape) + // Exception will be thrown if they have different signature + checkOutputs(_first_module.get(), _second_module.get()); + // Set metric std::unique_ptr metric; - switch (_ctx->metric) + for (auto metric : _ctx->metric) { - case Metric::MAE: - metric = std::make_unique(); - break; - default: - throw std::runtime_error("Unsupported metric."); + switch (metric) + { + case Metric::MAE: + { + _metrics.emplace_back(std::make_unique()); + break; + } + case Metric::MAPE: + { + _metrics.emplace_back(std::make_unique()); + break; + } + case Metric::MPEIR: + { + _metrics.emplace_back(std::make_unique()); + break; + } + case Metric::MTOP1: + { + _metrics.emplace_back(std::make_unique(1)); + break; + } + case Metric::MTOP5: + { + _metrics.emplace_back(std::make_unique(5)); + break; + } + case Metric::MSE: + { + _metrics.emplace_back(std::make_unique()); + break; + } + default: + throw std::runtime_error("Unsupported metric."); + } + _metrics.back()->init(_first_module.get(), _second_module.get()); } +} - auto first_module = import(_ctx->first_model_path); - auto second_module = import(_ctx->second_model_path); +void CircleEvalDiff::evalDiff(void) const +{ + auto first_input_loader = circle_eval_diff::makeDataLoader( + _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get())); + auto second_input_loader = circle_eval_diff::makeDataLoader( + _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get())); - // Set runner - switch (_ctx->input_format) + for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++) { - case InputFormat::H5: - _runner = std::make_unique(std::move(first_module), std::move(second_module), - std::move(metric)); - break; - default: - throw std::runtime_error("Unsupported input format."); + std::cout << "Evaluating " << data_idx << "'th data" << std::endl; + + auto first_data = first_input_loader->get(data_idx); + auto second_data = second_input_loader->get(data_idx); + + auto first_output = interpret(_first_module.get(), first_data); + auto second_output = interpret(_second_module.get(), second_data); + + for (auto &metric : _metrics) + { + metric->accumulate(first_output, second_output); + } + + if (_ctx.get()->output_prefix.empty()) + continue; + + for (uint32_t i = 0; i < first_output.size(); i++) + { + auto out = first_output[i]; + writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" + + std::to_string(i), + (char *)(out->buffer()), out->byte_size()); + } + for (uint32_t i = 0; i < second_output.size(); i++) + { + auto out = second_output[i]; + writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + + ".second.output" + std::to_string(i), + (char *)(out->buffer()), out->byte_size()); + } } -} -void CircleEvalDiff::evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const -{ - _runner->evalDiff(first_input_data_path, second_input_data_path); + for (auto &metric : _metrics) + { + std::cout << metric.get() << std::endl; + } } } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp new file mode 100644 index 0000000..99276f3 --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "InputDataLoader.h" + +#include +#include +#include + +#include +#include +#include +#include + +using DataType = loco::DataType; +using Shape = std::vector; + +namespace circle_eval_diff +{ + +// Check the type and the shape of CircleInput +void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape) +{ + // Type check + if (dtype != input_node->dtype()) + throw std::runtime_error("Wrong input type."); + + if (shape.size() != input_node->rank()) + throw std::runtime_error("Input rank mismatch."); + + for (uint32_t i = 0; i < shape.size(); i++) + { + if (not(shape.at(i) == input_node->dim(i))) + throw std::runtime_error("Input shape mismatch."); + } +} + +std::vector getEachByteSizeOf(const std::vector &nodes) +{ + std::vector vec; + + for (const auto node : nodes) + { + const auto input_node = loco::must_cast(node); + size_t element_size = 1; + + for (uint32_t index = 0; index < input_node->rank(); index++) + { + element_size *= input_node->dim(index).value(); + } + + vec.push_back(element_size); + } + + return vec; +} + +size_t getTotalByteSizeOf(const std::vector &nodes) +{ + size_t total_byte_size = 0; + + for (const auto node : nodes) + { + const auto input_node = loco::must_cast(node); + size_t byte_size = loco::size(input_node->dtype()); + + for (uint32_t index = 0; index < input_node->rank(); index++) + { + byte_size *= input_node->dim(index).value(); + } + + total_byte_size += byte_size; + } + + return total_byte_size; +} + +} // namespace circle_eval_diff + +namespace circle_eval_diff +{ + +HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector &input_nodes) + : _input_nodes{input_nodes} +{ + try + { + using HDF5Importer = dio::hdf5::HDF5Importer; + + _hdf5 = std::make_unique(file_path); + _hdf5->importGroup("value"); + } + catch (const H5::Exception &e) + { + H5::Exception::printErrorStack(); + throw std::runtime_error("HDF5 error occurred."); + } +} + +uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); } + +InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const +{ + Data data; + data.resize(_input_nodes.size()); + + for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++) + { + auto input_node = loco::must_cast(_input_nodes.at(input_idx)); + assert(input_node->index() == input_idx); + + data.at(input_idx) = *createEmptyTensor(input_node).get(); + + auto input_buffer = data.at(input_idx).buffer(); + try + { + if (_hdf5->isRawData()) + { + _hdf5->readTensor(data_idx, input_idx, input_buffer); + } + else + { + DataType dtype; + Shape shape; + _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer); + + // Check the type and the shape of the input data is valid + verifyTypeShape(input_node, dtype, shape); + } + } + catch (const H5::Exception &e) + { + H5::Exception::printErrorStack(); + throw std::runtime_error("HDF5 error occurred."); + } + } + + return data; +} + +DirectoryLoader::DirectoryLoader(const std::string &dir_path, + const std::vector &input_nodes) + : _input_nodes{input_nodes} +{ + DIR *dir = opendir(dir_path.c_str()); + if (not dir) + { + throw std::runtime_error("Cannot open directory \"" + dir_path + "\"."); + } + + struct dirent *entry = nullptr; + const auto input_total_bytes = getTotalByteSizeOf(input_nodes); + while (entry = readdir(dir)) + { + // Skip if the entry is not a regular file + if (entry->d_type != DT_REG) + continue; + + _data_paths.push_back(dir_path + "/" + entry->d_name); + } + + closedir(dir); +} + +uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); } + +InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const +{ + // Read raw data + const auto input_total_bytes = getTotalByteSizeOf(_input_nodes); + std::vector input_data(input_total_bytes); + const auto raw_data_path = _data_paths.at(data_idx); + std::ifstream fs(raw_data_path, std::ifstream::binary); + + if (fs.fail()) + { + throw std::runtime_error("Cannot open file \"" + raw_data_path + "\"."); + } + if (fs.read(input_data.data(), input_total_bytes).fail()) + { + throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\"."); + } + + // Make Tensor from raw data + auto input_data_cur = input_data.data(); + + Data data; + data.resize(_input_nodes.size()); + std::vector input_bytes = getEachByteSizeOf(_input_nodes); + for (uint32_t index = 0; index < _input_nodes.size(); index++) + { + const auto input_node = loco::must_cast(_input_nodes.at(index)); + auto &tensor = data.at(index); + tensor = *createEmptyTensor(input_node).get(); + auto buffer = tensor.buffer(); + std::memcpy(buffer, input_data_cur, input_bytes.at(index)); + input_data_cur += input_bytes.at(index); + } + + return data; +} + +std::unique_ptr makeDataLoader(const std::string &file_path, + const InputFormat &format, + const std::vector &input_nodes) +{ + switch (format) + { + case InputFormat::H5: + { + return std::make_unique(file_path, input_nodes); + } + case InputFormat::DIR: + { + return std::make_unique(file_path, input_nodes); + } + default: + throw std::runtime_error{"Unsupported input format."}; + } +} + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h new file mode 100644 index 0000000..14921b2 --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ +#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ + +#include +#include +#include + +#include "Tensor.h" + +#include +#include + +namespace circle_eval_diff +{ + +void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype, + const std::vector &shape); + +} // namespace circle_eval_diff + +namespace circle_eval_diff +{ + +enum class InputFormat +{ + Undefined, // For debugging + H5, + DIR, // directory + // TODO Implement Random, Directory +}; + +class InputDataLoader +{ +public: + using Data = std::vector; + +public: + virtual ~InputDataLoader() = default; + +public: + virtual uint32_t size(void) const = 0; + +public: + virtual Data get(uint32_t data_idx) const = 0; +}; + +class HDF5Loader final : public InputDataLoader +{ +public: + HDF5Loader(const std::string &file_path, const std::vector &input_nodes); + +public: + uint32_t size(void) const final; + Data get(uint32_t data_idx) const final; + +private: + const std::vector _input_nodes; + std::unique_ptr _hdf5; +}; + +// This class loads the directory that has raw data binary files. +class DirectoryLoader final : public InputDataLoader +{ +public: + DirectoryLoader(const std::string &dir_path, const std::vector &input_nodes); + +public: + uint32_t size(void) const final; + Data get(uint32_t data_idx) const final; + +private: + const std::vector _input_nodes; + std::vector _data_paths; +}; + +std::unique_ptr makeDataLoader(const std::string &file_path, + const InputFormat &format, + const std::vector &input_nodes); + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp new file mode 100644 index 0000000..cbe7879 --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "InputDataLoader.h" + +using namespace circle_eval_diff; + +TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(3); + input.dim(2).set(3); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + std::vector right_shape; + right_shape.emplace_back(1); + right_shape.emplace_back(3); + right_shape.emplace_back(3); + right_shape.emplace_back(2); + + EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape)); +} + +TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(4); + input.dim(2).set(4); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + loco::DataType wrong_data_type{loco::DataType::FLOAT16}; + std::vector wrong_shape; + wrong_shape.emplace_back(1); + wrong_shape.emplace_back(3); + wrong_shape.emplace_back(3); + wrong_shape.emplace_back(2); + + EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape)); + EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape)); +} diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp index d65eb9b..ec84084 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.cpp +++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp @@ -18,6 +18,7 @@ #include +#include #include #include @@ -30,6 +31,16 @@ using Tensor = circle_eval_diff::Tensor; namespace { +uint32_t num_elems(const luci::CircleNode *node) +{ + uint32_t res = 1; + + for (uint32_t i = 0; i < node->rank(); i++) + res *= node->dim(i).value(); + + return res; +} + template bool same_shape(const T a, const T b) { if (a->rank() != b->rank()) @@ -44,6 +55,8 @@ template bool same_shape(const T a, const T b) return true; } +template bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); } + template std::shared_ptr to_fp32(const std::shared_ptr &tensor) { assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS @@ -97,7 +110,6 @@ void MAEPrinter::init(const luci::Module *first, const luci::Module *second) { const auto first_node = loco::must_cast(first_output[i]); const auto second_node = loco::must_cast(second_output[i]); - assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS // Create tensors to store intermediate results _intermediate.emplace_back(); @@ -180,6 +192,471 @@ void MAEPrinter::dump(std::ostream &os) const } } +// TODO Remove duplicate codes with MAEPrinter +void MAPEPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast(first_output[i]); + const auto second_node = loco::must_cast(second_output[i]); + + // Create tensors to store intermediate results + _intermediate.emplace_back(); + _intermediate.at(i).dtype(loco::DataType::FLOAT32); + // NOTE Use both first_node and second_node to avoid release build break + _intermediate.at(i).rank(first_node->rank()); + uint32_t num_elems = 1; + for (uint32_t j = 0; j < second_node->rank(); j++) + { + _intermediate.at(i).dim(j) = second_node->dim(j); + num_elems *= second_node->dim(j).value(); + } + _intermediate.at(i).size(num_elems); + + // Check the buffer is initilized with zero + for (uint32_t j = 0; j < num_elems; j++) + assert(_intermediate.at(i).at(j) == 0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +// Accumulate |(a - b) / a| +void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr &a, + const std::shared_ptr &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < a->size(); i++) + { + const auto a_val = a->at(i); + const auto b_val = b->at(i); + _intermediate.at(output_idx).at(i) += + std::abs((a_val - b_val) / a_val); + } +} + +// Assumption +// first: the result of fp32 model +// second: the result of fake-quantized model +void MAPEPrinter::accumulate(const std::vector> &first, + const std::vector> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 and then compute absolute error + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MAPEPrinter::dump(std::ostream &os) const +{ + os << "Mean Absolute Percentage Error (MAPE)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto &inter = _intermediate.at(output_idx); + assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS + const auto elem_count = inter.size(); + + // Compute MAPE + float mape = 0.0; + for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++) + mape += inter.at(elem_idx); + + mape = mape / elem_count; + mape = mape / _num_data; + mape *= 100.0; + + os << "MAPE for " << name << " is " << mape << "%" << std::endl; + } +} + +// TODO Remove duplicate codes with MAEPrinter +void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast(first_output[i]); + const auto second_node = loco::must_cast(second_output[i]); + + // Create places to store intermediate results + _intermediate.emplace_back(0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +// Accumulate PEIR (Peak Error to Interval Ratio) +// PEIR = max(|a - b|) / (max(a) - min(a)) +// PEIR >= 0 (lower is better) +void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr &a, + const std::shared_ptr &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + float min = std::numeric_limits::max(); + float max = std::numeric_limits::lowest(); + + for (uint32_t i = 0; i < a->size(); i++) + { + const auto a_val = a->at(i); + min = std::min(a_val, min); + max = std::max(a_val, max); + } + + float interval = max - min; + + // Corner case: All values are the same. We set interval = 1 in this case + if (interval == 0) + interval = 1.0; + + float peak_error = std::numeric_limits::lowest(); + + for (uint32_t i = 0; i < a->size(); i++) + { + const auto a_val = a->at(i); + const auto b_val = b->at(i); + const auto error = std::abs(a_val - b_val); + peak_error = std::max(error, peak_error); + } + + _intermediate.at(output_idx) += peak_error / interval; +} + +// Assumption (when testing the accuracy of quantized model) +// first: the result of fp32 model +// second: the result of fake-quantized model +void MPEIRPrinter::accumulate(const std::vector> &first, + const std::vector> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 for ease of computation + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_peir(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MPEIRPrinter::dump(std::ostream &os) const +{ + os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto sum_of_peir = _intermediate.at(output_idx); + + // Compute MPEIR + float mpeir = sum_of_peir / _num_data; + + os << "MPEIR for " << name << " is " << mpeir << std::endl; + } +} + +// TODO Remove duplicate codes with MAEPrinter +void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast(first_output[i]); + const auto second_node = loco::must_cast(second_output[i]); + + // Create places to store intermediate results + _intermediate.emplace_back(0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + + // If num_elems of an output is less than k, + // the output index is added to the skip list + if (num_elems(first_node) < _k) + { + std::cout << "Top-" << _k << "metric for " << first_node->name() + << " is ignored, because it has elements less than " << _k << std::endl; + _skip_output.emplace_back(i); + } + } +} + +void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr &a, + const std::shared_ptr &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + // Find Top-k largest elements + // This implementation is a variant of "Method 2 (Use temporary array)" in + // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/ + // We sort top-k elements by value and index to ensure that the element with an earlier + // index comes first if multiple elements have the same value. + auto find_topk = [this](const std::shared_ptr &tensor) { + assert(_k <= tensor->size()); // FIX_CALLER_UNLESS + + // first: value, second: index + std::vector> topk; + topk.resize(_k); + + // Initialize + for (uint32_t i = 0; i < _k; i++) + { + topk[i] = std::make_pair(tensor->at(i), i); + } + + // Input pair: (value, index) + // Return true if a has smaller value than b. If a and b have the same value, + // return true if a has larger index. + auto compare = [](const std::pair &a, const std::pair &b) { + if (a.first == b.first) + return a.second > b.second; + + return a.first < b.first; + }; + + for (uint32_t i = _k; i < tensor->size(); i++) + { + auto val = std::make_pair(tensor->at(i), i); + + auto min = std::min_element(topk.begin(), topk.end(), compare); + if (compare(*min, val)) + { + // val is larger than min. Replace min with val. + auto min_index = std::distance(topk.begin(), min); + topk[min_index] = val; + } + } + + return topk; + }; + + auto first_topk = find_topk(a); + auto second_topk = find_topk(b); + + uint32_t matched = 0; + for (uint32_t i = 0; i < _k; i++) + { + for (uint32_t j = 0; j < _k; j++) + { + if (first_topk[i].second == second_topk[j].second) + { + matched++; + break; + } + } + } + + float matched_ratio = static_cast(matched) / _k; + + _intermediate.at(output_idx) += matched_ratio; +} + +bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const +{ + for (auto skip : _skip_output) + { + if (output_index == skip) + return true; + } + + return false; +} + +void TopKMatchPrinter::accumulate(const std::vector> &first, + const std::vector> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + if (in_skip_list(output_idx)) + continue; + + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 for ease of computation + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void TopKMatchPrinter::dump(std::ostream &os) const +{ + os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + if (in_skip_list(output_idx)) + continue; + + const auto name = _output_names.at(output_idx); + const auto sum_of_topk_accuracy = _intermediate.at(output_idx); + + // Compute TopKMatch + float mean_topk = sum_of_topk_accuracy / _num_data; + + os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl; + } +} + +void MSEPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast(first_output[i]); + const auto second_node = loco::must_cast(second_output[i]); + + // Create tensors to store intermediate results + _intermediate.emplace_back(); + _intermediate.at(i).dtype(loco::DataType::FLOAT32); + // NOTE Use both first_node and second_node to avoid release build break + _intermediate.at(i).rank(first_node->rank()); + uint32_t num_elems = 1; + for (uint32_t j = 0; j < second_node->rank(); j++) + { + _intermediate.at(i).dim(j) = second_node->dim(j); + num_elems *= second_node->dim(j).value(); + } + _intermediate.at(i).size(num_elems); + + // Check the buffer is initilized with zero + for (uint32_t j = 0; j < num_elems; j++) + assert(_intermediate.at(i).at(j) == 0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr &a, + const std::shared_ptr &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < a->size(); i++) + { + _intermediate.at(output_idx).at(i) += + (a->at(i) - b->at(i)) * + (a->at(i) - b->at(i)); + } +} + +void MSEPrinter::accumulate(const std::vector> &first, + const std::vector> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 and then compute absolute error + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_squared_error(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MSEPrinter::dump(std::ostream &os) const +{ + os << "Mean Squared Error (MSE)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto &inter = _intermediate.at(output_idx); + assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS + const auto elem_count = inter.size(); + + // Compute MSE + float mse = 0.0; + for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++) + mse += inter.at(elem_idx); + + mse = mse / elem_count; + mse = mse / _num_data; + + os << "MSE for " << name << " is " << mse << std::endl; + } +} + } // namespace circle_eval_diff #undef THROW_UNLESS diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h index b51581c..c8f2751 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.h +++ b/compiler/circle-eval-diff/src/MetricPrinter.h @@ -85,6 +85,133 @@ private: uint32_t _num_data = 0; }; +// Mean Squared Error +class MSEPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector> &first, + const std::vector> &second); + + void dump(std::ostream &os) const; + +private: + void accum_squared_error(uint32_t index, const std::shared_ptr &a, + const std::shared_ptr &b); + +private: + // Store accumulated sum of absolute error for each output + std::vector _intermediate; + std::vector _output_names; + uint32_t _num_data = 0; +}; + +// Mean Absolute Percentage Error +class MAPEPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector> &first, + const std::vector> &second); + + void dump(std::ostream &os) const; + +private: + void accum_mean_absolute_error(uint32_t index, const std::shared_ptr &a, + const std::shared_ptr &b); + +private: + // Store accumulated sum of absolute error for each output + std::vector _intermediate; + std::vector _output_names; + uint32_t _num_data = 0; +}; + +// Mean Peak Error to Interval Ratio (PEIR) +// PEIR = max(|a - b|) / (max(a) - min(a)) +// PEIR >= 0 (lower is better) +// +// When testing the accuracy of quantized model, +// the first model should be the original fp32 model, and +// the second model should be the fake-quantized fp32 model +class MPEIRPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector> &first, + const std::vector> &second); + + void dump(std::ostream &os) const; + +private: + void accum_peir(uint32_t index, const std::shared_ptr &a, + const std::shared_ptr &b); + +private: + // Store accumulated sum of PEIR for each output + std::vector _intermediate; + std::vector _output_names; + uint32_t _num_data = 0; +}; + +// Ratio of matched indices between top-k results of two models (a, b). +// +// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k +// mean top-k match = sum(top-k match) / num_data +// +// For example, +// num_data = 2 +// first model output = [1, 2, 3], [2, 3, 1] +// second model output = [2, 4, 6], [3, 2, 1] +// +// if k = 1, +// first model top-1 index = ([2], [1]) +// second model top-1 index = ([2], [0]) +// mean top-1 accuracy = (1 + 0) / 2 = 0.5 +// +// if k = 2, +// first model output = [1, 2, 3], [2, 3, 1] +// second model output = [2, 4, 6], [3, 2, 1] +// first model top-2 index = ([2, 1], [1, 0]) +// second model top-2 index = ([2, 1], [0, 1]) +// mean top-2 accuracy = (2 + 2) / 4 = 1 +// +// NOTE Order of elements is ignored when comparing two top-k sets. +// NOTE If two elements have the same value and only one can be included in top-k, +// the one with an earlier index will be included. +class TopKMatchPrinter : public MetricPrinter +{ +public: + TopKMatchPrinter(uint32_t k) : _k(k) {} + +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector> &first, + const std::vector> &second); + + void dump(std::ostream &os) const; + +private: + void accum_topk_accuracy(uint32_t index, const std::shared_ptr &a, + const std::shared_ptr &b); + + // Return true if the output is in the skip list (_skip_output) + bool in_skip_list(uint32_t output_index) const; + +private: + const uint32_t _k = 0; + // Store accumulated accuracy + std::vector _intermediate; + std::vector _output_names; + uint32_t _num_data = 0; + // Save index of output whose num_elements is less than k + std::vector _skip_output; +}; + } // namespace circle_eval_diff #endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__ diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp index 51ca897..0e71b80 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp +++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp @@ -180,6 +180,23 @@ std::shared_ptr output_tensor_with_value(const luci::Module *module, flo return tensor; } +std::shared_ptr output_tensor_with_value(const luci::Module *module, + std::vector &value) +{ + auto outputs = loco::output_nodes(module->graph()); + assert(outputs.size() == 1); + auto output = *outputs.begin(); + auto output_cnode = loco::must_cast(output); + auto tensor = create_empty_tensor(output_cnode); + auto tensor_size = tensor->size(); + assert(tensor_size == value.size()); + for (uint32_t i = 0; i < tensor_size; i++) + { + tensor->at(i) = value[i]; + } + return tensor; +} + } // namespace namespace circle_eval_diff @@ -233,4 +250,299 @@ TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG) EXPECT_ANY_THROW(mae.init(nullptr, nullptr)); } +TEST(CircleEvalMetricPrinterTest, MAPE_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MAPEPrinter mape; + + mape.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + auto output = output_tensor_with_value(&first, 2.0); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + auto output = output_tensor_with_value(&second, 1.0); + second_result.emplace_back(output); + } + + mape.accumulate(first_result, second_result); + + std::stringstream ss; + mape.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%")); +} + +TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG) +{ + MAPEPrinter mape; + + EXPECT_ANY_THROW(mape.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, MPEIR_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MPEIRPrinter mpeir; + + mpeir.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + std::vector val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + auto output = output_tensor_with_value(&second, 0.0); + second_result.emplace_back(output); + } + + mpeir.accumulate(first_result, second_result); + + std::stringstream ss; + mpeir.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG) +{ + MPEIRPrinter mpeir; + + EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, TopK_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top5(5); + + top5.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + std::vector val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + std::vector val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i * 2; + auto output = output_tensor_with_value(&second, val); + second_result.emplace_back(output); + } + + top5.accumulate(first_result, second_result); + + std::stringstream ss; + top5.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_tie) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top5(5); + + top5.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + std::vector val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + std::vector val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16}; + + auto output = output_tensor_with_value(&second, val); + second_result.emplace_back(output); + } + + top5.accumulate(first_result, second_result); + + std::stringstream ss; + top5.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top100(100); + + top100.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + auto output = output_tensor_with_value(&first, 0); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + auto output = output_tensor_with_value(&second, 0); + second_result.emplace_back(output); + } + + top100.accumulate(first_result, second_result); + + std::stringstream ss; + top100.dump(ss); + std::string result = ss.str(); + + EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG) +{ + TopKMatchPrinter topk(5); + + EXPECT_ANY_THROW(topk.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, MSE_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MSEPrinter mse; + + mse.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector> first_result; + { + auto output = output_tensor_with_value(&first, 1.0); + first_result.emplace_back(output); + } + + std::vector> second_result; + { + auto output = output_tensor_with_value(&second, 2.0); + second_result.emplace_back(output); + } + + mse.accumulate(first_result, second_result); + + std::stringstream ss; + mse.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG) +{ + MSEPrinter mse; + + EXPECT_ANY_THROW(mse.init(nullptr, nullptr)); +} + } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp deleted file mode 100644 index 85f9858..0000000 --- a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ModuleEvalDiff.h" -#include "Tensor.h" - -#include -#include - -#include -#include -#include -#include - -using Tensor = circle_eval_diff::Tensor; -using DataType = loco::DataType; -using Shape = std::vector; -using HDF5Importer = dio::hdf5::HDF5Importer; - -namespace -{ - -// Check the type and the shape of CircleInput -void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape) -{ - // Type check - if (dtype != input_node->dtype()) - throw std::runtime_error("Wrong input type."); - - if (shape.size() != input_node->rank()) - throw std::runtime_error("Input rank mismatch."); - - for (uint32_t i = 0; i < shape.size(); i++) - { - if (not(shape.at(i) == input_node->dim(i))) - throw std::runtime_error("Input shape mismatch."); - } -} - -// Return number of elements of the node. -uint32_t numElements(const luci::CircleNode *node) -{ - uint32_t num_elem = 1; - for (uint32_t i = 0; i < node->rank(); ++i) - num_elem *= node->dim(i).value(); - return num_elem; -} - -// Return Tensor which has the same dtype and shape with node. -// Buffer does not have any data yet. -std::shared_ptr createEmptyTensor(const luci::CircleNode *node) -{ - auto tensor = std::make_shared(); - { - tensor->dtype(node->dtype()); - tensor->rank(node->rank()); - for (uint32_t i = 0; i < node->rank(); i++) - tensor->dim(i) = node->dim(i); - - switch (node->dtype()) - { - case loco::DataType::FLOAT32: - tensor->size(numElements(node)); - break; - case loco::DataType::U8: - tensor->size(numElements(node)); - break; - case loco::DataType::S16: - tensor->size(numElements(node)); - break; - case loco::DataType::S32: - tensor->size(numElements(node)); - break; - case loco::DataType::S64: - tensor->size(numElements(node)); - break; - default: - throw std::runtime_error("Unsupported input tensor dtype for " + node->name()); - } - } - - return tensor; -} - -} // namespace - -namespace circle_eval_diff -{ - -void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const -{ - const auto interp = std::make_unique(_first_module.get()); - - _metric->init(_first_module.get(), _second_module.get()); - - try - { - HDF5Importer first_h5(first_input_data_path); - first_h5.importGroup("value"); - - HDF5Importer second_h5(second_input_data_path); - second_h5.importGroup("value"); - - const auto first_num_data = first_h5.numData(); - const auto second_num_data = second_h5.numData(); - - if (first_num_data != second_num_data) - throw std::runtime_error( - "Number of data in the first data file and the second data file mismatches."); - - if (first_num_data == 0) - throw std::runtime_error("Input data file does not contain any record."); - - const auto first_input_nodes = loco::input_nodes(_first_module->graph()); - const auto first_num_inputs = first_input_nodes.size(); - const auto first_output_nodes = loco::output_nodes(_first_module->graph()); - const auto first_num_outputs = first_output_nodes.size(); - - const auto second_input_nodes = loco::input_nodes(_second_module->graph()); - const auto second_num_inputs = second_input_nodes.size(); - const auto second_output_nodes = loco::output_nodes(_second_module->graph()); - const auto second_num_outputs = second_output_nodes.size(); - - for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++) - { - std::cout << "Evaluating " << data_idx << "'th data" << std::endl; - - if (first_num_inputs != first_h5.numInputs(data_idx) || - second_num_inputs != second_h5.numInputs(data_idx)) - throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) + - "th data."); - - // Do inference and return output - auto eval = [&](HDF5Importer &h5, uint32_t num_inputs, - const std::vector &input_nodes, uint32_t num_outputs, - const std::vector &output_nodes) { - // Write input data - for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++) - { - const auto *input_node = - loco::must_cast(input_nodes[input_idx]); - assert(input_node->index() == input_idx); - - auto tensor = createEmptyTensor(input_node); - if (h5.isRawData()) - { - h5.readTensor(data_idx, input_idx, tensor->buffer()); - } - else - { - DataType dtype; - Shape shape; - h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer()); - - // Check the type and the shape of the input data is valid - verifyTypeShape(input_node, dtype, shape); - } - - interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size()); - } - - // Interpret - interp->interpret(); - - // Read output data - std::vector> outputs; - for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++) - { - const auto *output_node = - loco::must_cast(output_nodes[output_idx]); - assert(output_node->index() == output_idx); - - auto tensor = createEmptyTensor(output_node); - interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size()); - outputs.emplace_back(tensor); - } - - return outputs; - }; - - auto first_output = - eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes); - auto second_output = eval(second_h5, second_num_inputs, second_input_nodes, - second_num_outputs, second_output_nodes); - - // Accumulate diffs - _metric->accumulate(first_output, second_output); - } - - std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl; - } - catch (const H5::Exception &e) - { - H5::Exception::printErrorStack(); - throw std::runtime_error("HDF5 error occurred."); - } - - // Print metric - std::cout << _metric.get() << std::endl; -} - -} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h deleted file mode 100644 index c7642f6..0000000 --- a/compiler/circle-eval-diff/src/ModuleEvalDiff.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ -#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ - -#include "MetricPrinter.h" - -#include - -#include - -namespace circle_eval_diff -{ - -class ModuleEvalDiff -{ -public: - ModuleEvalDiff(std::unique_ptr &&first, std::unique_ptr &&second, - std::unique_ptr &&metric) - : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric)) - { - } - - virtual ~ModuleEvalDiff() = default; - - // Implement this in the child class - virtual void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const = 0; - -protected: - std::unique_ptr _first_module; - std::unique_ptr _second_module; - std::unique_ptr _metric; -}; - -class H5InputEvalDiff final : public ModuleEvalDiff -{ -public: - H5InputEvalDiff(std::unique_ptr &&first, std::unique_ptr &&second, - std::unique_ptr &&metric) - : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric)) - { - } - - void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const; -}; - -// TODO Implement ModuleEvalDiff for random input and directory input - -} // namespace circle_eval_diff - -#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp index 6710e8c..c3efc44 100644 --- a/compiler/circle-eval-diff/src/Tensor.cpp +++ b/compiler/circle-eval-diff/src/Tensor.cpp @@ -16,8 +16,24 @@ #include "Tensor.h" +#include + #include +namespace +{ + +// Return number of elements of the node. +uint32_t numElements(const luci::CircleNode *node) +{ + uint32_t num_elem = 1; + for (uint32_t i = 0; i < node->rank(); ++i) + num_elem *= node->dim(i).value(); + return num_elem; +} + +} // namespace + namespace circle_eval_diff { @@ -69,4 +85,40 @@ INSTANTIATE(loco::DataType::FLOAT32); #undef INSTANTIATE +// Return Tensor which has the same dtype and shape with node. +// Buffer does not have any data yet. +std::shared_ptr createEmptyTensor(const luci::CircleNode *node) +{ + auto tensor = std::make_shared(); + { + tensor->dtype(node->dtype()); + tensor->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + tensor->dim(i) = node->dim(i); + + switch (node->dtype()) + { + case loco::DataType::FLOAT32: + tensor->size(numElements(node)); + break; + case loco::DataType::U8: + tensor->size(numElements(node)); + break; + case loco::DataType::S16: + tensor->size(numElements(node)); + break; + case loco::DataType::S32: + tensor->size(numElements(node)); + break; + case loco::DataType::S64: + tensor->size(numElements(node)); + break; + default: + throw std::runtime_error("Unsupported input tensor dtype for " + node->name()); + } + } + + return tensor; +} + } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h index 65ab606..d4f65d9 100644 --- a/compiler/circle-eval-diff/src/Tensor.h +++ b/compiler/circle-eval-diff/src/Tensor.h @@ -18,6 +18,7 @@ #define __CIRCLE_EVAL_DIFF_TENSOR_H__ #include +#include #include @@ -76,6 +77,8 @@ private: std::vector _data; }; +std::shared_ptr createEmptyTensor(const luci::CircleNode *node); + } // namespace circle_eval_diff #endif // __CIRCLE_EVAL_DIFF_TENSOR_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp index 3bdeaec..3958657 100644 --- a/compiler/circle-eval-diff/src/Tensor.test.cpp +++ b/compiler/circle-eval-diff/src/Tensor.test.cpp @@ -18,6 +18,8 @@ #include +#include + using Tensor = circle_eval_diff::Tensor; namespace @@ -99,3 +101,29 @@ TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG) SUCCEED(); } + +TEST(CircleEvalDiffTensorTest, createEmptyTensorTest) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(3); + input.dim(2).set(3); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + std::vector right_shape; + right_shape.emplace_back(1); + right_shape.emplace_back(3); + right_shape.emplace_back(3); + right_shape.emplace_back(2); + + auto tensor = circle_eval_diff::createEmptyTensor(&input); + EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype()); + EXPECT_EQ(4, tensor->rank()); + EXPECT_EQ(1, tensor->dim(0)); + EXPECT_EQ(3, tensor->dim(1)); + EXPECT_EQ(3, tensor->dim(2)); + EXPECT_EQ(2, tensor->dim(3)); +} diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt index 2f657c1..da74e02 100644 --- a/compiler/circle-execution-plan/CMakeLists.txt +++ b/compiler/circle-execution-plan/CMakeLists.txt @@ -1,3 +1,9 @@ +nnas_find_package(Jsoncpp) +if(NOT Jsoncpp_FOUND) + message(STATUS "Build circle-execution-plan: FAILED (missing jsoncpp)") + return() +endif(NOT Jsoncpp_FOUND) + set(SOURCES pal/IScratchpadHelper.h pal/ScratchpadHelperLinux.h @@ -10,6 +16,9 @@ set(SOURCES ) add_executable(circle_execution_plan "${SOURCES}") +target_include_directories(circle_execution_plan PRIVATE ${Jsoncpp_INCLUDE_DIRS}) + +target_link_libraries(circle_execution_plan ${Jsoncpp_STATIC_LIB}) target_link_libraries(circle_execution_plan foder) target_link_libraries(circle_execution_plan safemain) target_link_libraries(circle_execution_plan luci_env) diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp index 1788124..d5ddf0c 100644 --- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp +++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp @@ -33,20 +33,22 @@ int entry(int argc, char **argv) { arser::Arser arser("circle_execution_plan provides model with execution plan meta information"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); - arser.add_argument("--platform") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .default_value("linux") - .help("Platform name: linux mcu cmsisnn"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); + arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn"); arser.add_argument("--use_dsp") .nargs(1) .type(arser::DataType::BOOL) .required(false) .default_value(false) .help("Plan with or without dsp (now can be used only with cmsisnn)"); + arser.add_argument("--save_allocations") + .nargs(1) + .required(false) + .default_value("") + .help("Path for output JSON file to save memory allocation info. " + "Note: path end of file should have 'tracealloc.json' (example path: " + "'../exec_plan_info.tracealloc.json')"); try { @@ -63,6 +65,7 @@ int entry(int argc, char **argv) const std::string output_path = arser.get("output"); const std::string platform_name = arser.get("--platform"); const bool use_dsp = arser.get("--use_dsp"); + const std::string json_path = arser.get("--save_allocations"); if (platform_name != "cmsisnn" && use_dsp) { @@ -89,6 +92,13 @@ int entry(int argc, char **argv) return EXIT_FAILURE; } + bool is_save_allocations = false; + + if (!json_path.empty()) + { + is_save_allocations = true; + } + foder::FileLoader file_loader{input_path}; std::vector model_data; @@ -124,6 +134,9 @@ int entry(int argc, char **argv) circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp}); execution_planner.make_execution_plan(); + if (is_save_allocations) + execution_planner.create_json_allocation_file(json_path); + // Export to output Circle file luci::CircleExporter exporter; luci::CircleFileExpContract contract(module.get(), output_path); diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp index ec2ec13..a1e6f7e 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp @@ -18,6 +18,9 @@ #include #include +#include +#include + namespace circle_planner { namespace @@ -58,6 +61,29 @@ bool isTensorProducingNode(const luci::CircleNode *node) } } +// Create allocation node part for current circle node for json allocation info file +void create_allocation_node(Json::Value &allocations_node, + AllocationNodeInformation &alloca_node_inform, uint32_t alive_till_max, + luci::CircleNode *circle_node) +{ + Json::Value allocation_node; + if (alloca_node_inform.size == 0) + return; + + allocation_node["offset"] = alloca_node_inform.offset; + allocation_node["size"] = alloca_node_inform.size; + allocation_node["alive_from"] = alloca_node_inform.first_node; + + if (alloca_node_inform.last_node == node_not_assigned) + allocation_node["alive_till"] = alive_till_max + 1; + else + allocation_node["alive_till"] = alloca_node_inform.last_node; + + allocation_node["origin"] = circle_node->name(); + + allocations_node.append(allocation_node); +} + } // namespace void ExecutionPlanner::make_execution_plan() @@ -74,6 +100,50 @@ void ExecutionPlanner::make_execution_plan() settings->set(luci::UserSettings::Key::ExecutionPlanGen, true); } +void ExecutionPlanner::create_json_allocation_file(const std::string &json_path) +{ + Json::Value main_tree; + Json::Value segments_node; + Json::Value allocations_node; + + uint32_t alive_till_max = 0; + + // Find max dealloc value to assign to nodes with node_not_assigned value + for (const auto elem : _dealloc_node) + { + if (alive_till_max < elem and elem != node_not_assigned) + alive_till_max = elem; + } + + for (auto &alloc_node_inform : _alloc_node_inform_vector) + { + const auto node_num = alloc_node_inform.node_num; + const auto circle_node = loco::must_cast(_ordered_nodes[node_num]); + + create_allocation_node(allocations_node, alloc_node_inform, alive_till_max, circle_node); + } + + // Create segment part + Json::Value segment_node; + segment_node["name"] = "Segment1"; + segment_node["allocations"] = allocations_node; + segments_node.append(segment_node); + + main_tree["schema_version"] = 1; + main_tree["segments"] = segments_node; + + Json::StreamWriterBuilder builder; + const std::unique_ptr writer(builder.newStreamWriter()); + + // Write to json file + std::ofstream out; + out.open(json_path); + if (out.is_open()) + { + writer->write(main_tree, &out); + } +} + void ExecutionPlanner::get_default_execution_order_plan() { // Get execution order in _ordered_nodes diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h index e0833c4..af3fba3 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.h +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h @@ -104,6 +104,8 @@ public: _is_null_scratchpads = is_null_scratchpads; }; + void create_json_allocation_file(const std::string &json_path); + private: // Method gets default execution order plan and saves it in _ordered_nodes vector. // There can be different variants of execution order and this method provides main one. diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp index 10e185d..318a582 100644 --- a/compiler/circle-inspect/driver/Driver.cpp +++ b/compiler/circle-inspect/driver/Driver.cpp @@ -36,7 +36,7 @@ int entry(int argc, char **argv) .help("Dump Conv2D series weight operators in circle file"); arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file"); arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors"); - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect"); + arser.add_argument("circle").help("Circle file to inspect"); try { diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake index 362d67c..183dfe2 100644 --- a/compiler/circle-inspect/requires.cmake +++ b/compiler/circle-inspect/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-circle04") require("safemain") diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp index bba5e56..aa8fed2 100644 --- a/compiler/circle-inspect/src/Dump.cpp +++ b/compiler/circle-inspect/src/Dump.cpp @@ -15,7 +15,9 @@ */ #include "Dump.h" -#include "Reader.h" + +#include +#include #include @@ -24,7 +26,7 @@ namespace circleinspect void DumpOperators::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); @@ -50,7 +52,7 @@ void DumpOperators::run(std::ostream &os, const circle::Model *model) namespace { -const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor) +const circle::Operator *operator_match_output(mio::circle::Reader &reader, const int32_t tensor) { auto ops = reader.operators(); @@ -58,7 +60,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con { const auto op = ops->Get(i); - const std::vector &outputs = circleinspect::as_index_vector(op->outputs()); + const std::vector &outputs = mio::circle::as_index_vector(op->outputs()); for (auto output : outputs) { @@ -69,7 +71,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con return nullptr; } -size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id) +size_t tensor_buffer_size(mio::circle::Reader &reader, const int32_t tensor_id) { auto tensors = reader.tensors(); @@ -93,7 +95,7 @@ namespace circleinspect void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); @@ -110,7 +112,7 @@ void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model) if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D) { - const std::vector &inputs = circleinspect::as_index_vector(op->inputs()); + const std::vector &inputs = mio::circle::as_index_vector(op->inputs()); if (inputs.size() < 2) { throw std::runtime_error("Operator has invalid input"); @@ -147,7 +149,7 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model) { std::map op_version_map; - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); // This assert is subject to be changed later assert(reader.num_subgraph() == 1); @@ -181,7 +183,7 @@ namespace circleinspect void DumpTensorDType::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp deleted file mode 100644 index 0e28652..0000000 --- a/compiler/circle-inspect/src/Reader.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Reader.h" - -#include - -#include -#include - -namespace circleinspect -{ - -Reader::Reader(const circle::Model *model) -{ - _subgraphs = model->subgraphs(); - _buffers = model->buffers(); - - auto opcodes = model->operator_codes(); - for (const ::circle::OperatorCode *opcode : *opcodes) - { - _op_codes.push_back(opcode); - } -} - -size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) -{ - if (buff_data != nullptr) - { - *buff_data = nullptr; - } - - if (buf_idx == 0) - return 0; - - if (auto *buffer = (*_buffers)[buf_idx]) - { - if (auto *array = buffer->data()) - { - if (size_t size = array->size()) - { - if (buff_data != nullptr) - { - *buff_data = reinterpret_cast(array->data()); - } - return size; - } - } - } - - return 0; -} - -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - return mio::circle::builtin_code_neutral(opcode); -} - -std::string Reader::opcode_name(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - if (!mio::circle::is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } - - return mio::circle::opcode_name(opcode); -} - -std::string Reader::tensor_name(const circle::Tensor *tensor) const -{ - return mio::circle::tensor_name(tensor); -} - -std::string Reader::tensor_dtype(const circle::Tensor *tensor) const -{ - return mio::circle::tensor_type(tensor); -} - -bool Reader::select_subgraph(uint32_t sgindex) -{ - _tensors = nullptr; - _operators = nullptr; - - _inputs.clear(); - _outputs.clear(); - - if (_subgraphs->Length() <= sgindex) - { - assert(false); - return false; - } - - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; - - _tensors = subgraph->tensors(); - _operators = subgraph->operators(); - - _inputs = as_index_vector(subgraph->inputs()); - _outputs = as_index_vector(subgraph->outputs()); - - return true; -} - -} // namespace circleinspect diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h deleted file mode 100644 index c38ec39..0000000 --- a/compiler/circle-inspect/src/Reader.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __READER_H__ -#define __READER_H__ - -#include - -#include -#include -#include - -namespace circleinspect -{ - -template std::vector as_index_vector(const flatbuffers::Vector *flat_array) -{ - std::vector ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} - -/** - * @brief Loads Circle file and provides helpers to access attributes - */ -class Reader -{ -private: - using CircleSubGraphs_t = flatbuffers::Vector>; - using CircleBuffers_t = flatbuffers::Vector>; - using CircleTensors_t = flatbuffers::Vector>; - using CircleOperators_t = flatbuffers::Vector>; - -public: - Reader(const circle::Model *model); - - Reader() = delete; - -public: - const std::vector &opcodes() { return _op_codes; } - const CircleBuffers_t *buffers() { return _buffers; } - const CircleTensors_t *tensors() { return _tensors; } - const CircleOperators_t *operators() { return _operators; } - const std::vector &inputs() const { return _inputs; } - const std::vector &outputs() const { return _outputs; } - - uint32_t num_subgraph() const { return _subgraphs->Length(); } - - size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; - std::string tensor_name(const circle::Tensor *tensor) const; - std::string tensor_dtype(const circle::Tensor *tensor) const; - -public: - bool select_subgraph(uint32_t subgraph); - -private: - const CircleSubGraphs_t *_subgraphs{nullptr}; - const CircleBuffers_t *_buffers{nullptr}; - const CircleTensors_t *_tensors{nullptr}; - const CircleOperators_t *_operators{nullptr}; - - std::vector _op_codes; - std::vector _inputs; - std::vector _outputs; -}; - -} // namespace circleinspect - -#endif // __READER_H__ diff --git a/compiler/circle-interpreter/CMakeLists.txt b/compiler/circle-interpreter/CMakeLists.txt new file mode 100644 index 0000000..d18db3e --- /dev/null +++ b/compiler/circle-interpreter/CMakeLists.txt @@ -0,0 +1,13 @@ +set(INTERPRETER + src/CircleInterpreter.cpp + ) + +add_executable(circle-interpreter ${INTERPRETER}) +target_link_libraries(circle-interpreter PRIVATE arser) +target_link_libraries(circle-interpreter PRIVATE loco) +target_link_libraries(circle-interpreter PRIVATE luci_import) +target_link_libraries(circle-interpreter PRIVATE luci_interpreter) +target_link_libraries(circle-interpreter PRIVATE safemain) +target_link_libraries(circle-interpreter PRIVATE vconone) + +install(TARGETS circle-interpreter DESTINATION bin) diff --git a/compiler/circle-interpreter/requires.cmake b/compiler/circle-interpreter/requires.cmake new file mode 100644 index 0000000..a565df6 --- /dev/null +++ b/compiler/circle-interpreter/requires.cmake @@ -0,0 +1,6 @@ +require("arser") +require("loco") +require("luci") +require("luci-interpreter") +require("safemain") +require("vconone") diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp new file mode 100644 index 0000000..1d24127 --- /dev/null +++ b/compiler/circle-interpreter/src/CircleInterpreter.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace +{ + +void readDataFromFile(const std::string &filename, char *data, size_t data_size) +{ + std::ifstream fs(filename, std::ifstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.read(data, data_size).fail()) + throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n"); +} + +void writeDataToFile(const std::string &filename, const char *data, size_t data_size) +{ + std::ofstream fs(filename, std::ofstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.write(data, data_size).fail()) + { + throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n"); + } +} + +template size_t getTensorSize(const NodeT *node) +{ + uint32_t tensor_size = loco::size(node->dtype()); + for (uint32_t i = 0; i < node->rank(); ++i) + tensor_size *= node->dim(i).value(); + return tensor_size; +} + +void print_version(void) +{ + std::cout << "circle-interpreter version " << vconone::get_string() << std::endl; + std::cout << vconone::get_copyright() << std::endl; +} + +} // namespace + +/* + * @brief CircleInterpreter main + * + * Driver to invoke luci-interpreter + * + */ +int entry(int argc, char **argv) +{ + arser::Arser arser("Interpreter driver for circle models"); + + arser::Helper::add_version(arser, print_version); + + arser.add_argument("model_path").help("Circle model filepath"); + arser.add_argument("input_prefix") + .help("Input data filepath for circle model. " + "n-th input data is read from ${input_prefix}n, " + "for example, Add.circle.input0, Add.circle.input1"); + arser.add_argument("output_prefix") + .help("Output data filepath for circle model. " + "Output data is written in ${output_file}n, " + "for example, Add.circle.output0"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cout << err.what() << std::endl; + std::cout << arser; + return EXIT_FAILURE; + } + + const auto filename = arser.get("model_path"); + const auto input_prefix = arser.get("input_prefix"); + const auto output_prefix = arser.get("output_prefix"); + + // Load model from the file + luci::ImporterEx importer; + std::unique_ptr module = importer.importVerifyModule(filename); + if (module == nullptr) + { + std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl; + return EXIT_FAILURE; + } + + // Create interpreter. + luci_interpreter::Interpreter interpreter(module.get()); + + // Set input. + // Data for n'th input is read from ${input_prefix}n + // (ex: Add.circle.input0, Add.circle.input1 ..) + const auto input_nodes = loco::input_nodes(module->graph()); + for (int32_t i = 0; i < input_nodes.size(); i++) + { + const auto *input_node = loco::must_cast(input_nodes[i]); + std::vector input_data(getTensorSize(input_node)); + readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(), + input_data.size()); + interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + } + + // Do inference. + interpreter.interpret(); + + // Get output. + const auto output_nodes = loco::output_nodes(module->graph()); + for (int i = 0; i < module->graph()->outputs()->size(); i++) + { + const auto *output_node = loco::must_cast(output_nodes[i]); + std::vector output_data(getTensorSize(output_node)); + interpreter.readOutputTensor(output_node, output_data.data(), output_data.size()); + + // Output data is written in ${output_file}n + // (ex: Add.circle.output0) + writeDataToFile(std::string(output_prefix) + std::to_string(i), output_data.data(), + output_data.size()); + } + return EXIT_SUCCESS; +} diff --git a/compiler/circle-operator-test/CMakeLists.txt b/compiler/circle-operator-test/CMakeLists.txt new file mode 100644 index 0000000..2ebd533 --- /dev/null +++ b/compiler/circle-operator-test/CMakeLists.txt @@ -0,0 +1,18 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR) +get_target_property(CIRCLE_OPERATOR_PATH circle-operator BINARY_DIR) +set(CIRCLE_OPERATOR_PATH "${CIRCLE_OPERATOR_PATH}/circle-operator") + +nnas_find_package(GTest REQUIRED) + +file(GLOB_RECURSE TESTS "src/*.test.cpp") + +GTest_AddTest(circle-operator-test ${TESTS}) + +set_tests_properties(circle-operator-test + PROPERTIES + ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_OPERATOR_PATH=${CIRCLE_OPERATOR_PATH}" + ) diff --git a/compiler/circle-operator-test/README.md b/compiler/circle-operator-test/README.md new file mode 100644 index 0000000..d07c64d --- /dev/null +++ b/compiler/circle-operator-test/README.md @@ -0,0 +1,7 @@ +# circle-operator-test + +_circle-operator-test_ provides test of circle-operator tool is working as expected. + +Current tests includes +- input arguments test is working as expected +- output of this tool is as expected diff --git a/compiler/circle-operator-test/requires.cmake b/compiler/circle-operator-test/requires.cmake new file mode 100644 index 0000000..8ad3b8a --- /dev/null +++ b/compiler/circle-operator-test/requires.cmake @@ -0,0 +1,2 @@ +require("circle-operator") +require("common-artifacts") diff --git a/compiler/circle-operator-test/src/circle-operator.test.cpp b/compiler/circle-operator-test/src/circle-operator.test.cpp new file mode 100644 index 0000000..29c6f37 --- /dev/null +++ b/compiler/circle-operator-test/src/circle-operator.test.cpp @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +class cirlce_operator_test : public ::testing::Test +{ +protected: + bool initialize(void); + bool run(const std::string &command); + +protected: + bool load(const std::string &file); + +protected: + std::string _artifacts_path; + std::string _circle_operator_path; + std::string _result; +}; + +bool cirlce_operator_test::initialize(void) +{ + char *path = std::getenv("ARTIFACTS_PATH"); + if (path == nullptr) + { + std::cerr << "ARTIFACTS_PATH not found" << std::endl; + return false; + } + _artifacts_path = path; + + path = std::getenv("CIRCLE_OPERATOR_PATH"); + if (path == nullptr) + { + std::cerr << "ARTIFACTS_BIN_PATH not found" << std::endl; + return false; + } + _circle_operator_path = path; + + return true; +} + +bool cirlce_operator_test::run(const std::string &command) +{ + std::vector buffer(260); + std::string result = ""; + std::string cmd_err = command + " 2>&1"; + FILE *pipe = popen(cmd_err.c_str(), "r"); + if (!pipe) + { + return false; + } + try + { + while (fgets(&buffer[0], buffer.size(), pipe) != NULL) + { + result += &buffer[0]; + } + } + catch (...) + { + pclose(pipe); + return false; + } + pclose(pipe); + _result = result; + + std::cout << _result << std::endl; + + return true; +} + +bool cirlce_operator_test::load(const std::string &file) +{ + std::ifstream tmp(file.c_str()); + if (tmp.fail()) + return false; + + std::stringstream buffer; + buffer << tmp.rdbuf(); + _result = buffer.str(); + return true; +} + +TEST_F(cirlce_operator_test, valid_names) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ofm"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, valid_codes) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, invalid_option_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --opname " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("Invalid argument"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, check_code_name) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ofm"); + ASSERT_NE(std::string::npos, pos); + const auto pos2 = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos2); +} + +TEST_F(cirlce_operator_test, nonexist_file_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/non_exist_file.foo"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ERROR"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, invalid_file_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.recipe"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ERROR"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, output_file) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string fileName("/tmp/a.txt"); + std::remove(fileName.c_str()); + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code --output_path " + fileName + " " + model; + if (!run(command)) + { + FAIL(); + return; + } + if (!load(fileName)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos); +} diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt new file mode 100644 index 0000000..6817a86 --- /dev/null +++ b/compiler/circle-operator/CMakeLists.txt @@ -0,0 +1,17 @@ +if(NOT TARGET mio_circle04) + return() +endif(NOT TARGET mio_circle04) + +set(DRIVER "driver/Driver.cpp") + +file(GLOB_RECURSE SOURCES "src/*.cpp") + +add_executable(circle-operator ${DRIVER} ${SOURCES}) +target_include_directories(circle-operator PRIVATE src) +target_link_libraries(circle-operator arser) +target_link_libraries(circle-operator foder) +target_link_libraries(circle-operator mio_circle04) +target_link_libraries(circle-operator mio_circle04_helper) +target_link_libraries(circle-operator safemain) + +install(TARGETS circle-operator DESTINATION bin) diff --git a/compiler/circle-operator/README.md b/compiler/circle-operator/README.md new file mode 100644 index 0000000..86a923f --- /dev/null +++ b/compiler/circle-operator/README.md @@ -0,0 +1,70 @@ +# circle-operator + +_circle-operator_ allows users to retrieve operators information from a Circle model file + +NOTE: this tool is primary for ONE-vscode where PartEditor needs names and codes +of the operators. + +## Information with operators + +Operators with `--name` +- show operator names one line at a time in execution order + +Example +``` +$ circle-operator --name model.circle +``` + +Result +``` +conv1_pad/Pad +conv1_conv/BiasAdd +pool1_pad/Pad +``` + +Operators codes with `--code` +- show operator codes one line at a time in execution order + +Example +``` +$ circle-operator --code model.circle +``` + +Result +``` +PAD +CONV_2D +PAD +``` + +Operators with both `--code` and `--name` +- show operator both codes and name separated with `,` one line at a time in execution order + +Example +``` +$ circle-operator --code --name model.circle +``` + +Result +``` +PAD,conv1_pad/Pad +CONV_2D,conv1_conv/BiasAdd +PAD,pool1_pad/Pad +``` + +## Save to file + +Use `--output_path` to save results to a file. + +Example +``` +$ circle-operator --name --output_path /tmp/result model.circle +``` + +Result +``` +$ cat /tmp/result +conv1_pad/Pad +conv1_conv/BiasAdd +pool1_pad/Pad +``` diff --git a/compiler/circle-operator/driver/Driver.cpp b/compiler/circle-operator/driver/Driver.cpp new file mode 100644 index 0000000..f5fd807 --- /dev/null +++ b/compiler/circle-operator/driver/Driver.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dump.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +void handle_segfault(int signal, siginfo_t *si, void *arg) +{ + std::cerr << "ERROR: Failed to load file" << std::endl; + exit(255); +} + +int entry(int argc, char **argv) +{ + // TODO add option to dump for all sub-graphs + arser::Arser arser{ + "circle-operator allows users to retrieve operator information from a Circle model file"}; + arser.add_argument("--name").nargs(0).help("Dump operators name in circle file"); + arser.add_argument("--code").nargs(0).help("Dump operators code in circle file"); + arser.add_argument("--output_path").help("Save output to file (default output is console)"); + arser.add_argument("circle").help("Circle file to dump"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + std::cerr << arser; + return 255; + } + + cirops::DumpOption option; + option.names = arser["--name"]; + option.codes = arser["--code"]; + + std::ofstream oFstream; + std::ostream *oStream = &std::cout; + if (arser["--output_path"]) + { + auto output_path = arser.get("--output_path"); + oFstream.open(output_path, std::ofstream::out | std::ofstream::trunc); + if (oFstream.fail()) + { + std::cerr << "ERROR: Failed to create output to file " << output_path << std::endl; + return 255; + } + oStream = &oFstream; + } + + // hook segment fault + struct sigaction sa; + memset(&sa, 0, sizeof(struct sigaction)); + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = handle_segfault; + sa.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &sa, NULL); + + std::string modelFile = arser.get("circle"); + // Load Circle model from a circle file + try + { + foder::FileLoader fileLoader{modelFile}; + std::vector modelData = fileLoader.load(); + const circle::Model *circleModel = circle::GetModel(modelData.data()); + if (circleModel == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << modelFile << "'" << std::endl; + return 255; + } + cirops::DumpOperators dump; + dump.run(*oStream, circleModel, option); + } + catch (const std::runtime_error &err) + { + std::cerr << "ERROR: " << err.what() << std::endl; + return 255; + } + + if (oFstream.is_open()) + { + oFstream.close(); + } + + return 0; +} diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake new file mode 100644 index 0000000..183dfe2 --- /dev/null +++ b/compiler/circle-operator/requires.cmake @@ -0,0 +1,4 @@ +require("arser") +require("foder") +require("mio-circle04") +require("safemain") diff --git a/compiler/circle-operator/src/Dump.cpp b/compiler/circle-operator/src/Dump.cpp new file mode 100644 index 0000000..36bfe86 --- /dev/null +++ b/compiler/circle-operator/src/Dump.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dump.h" + +#include +#include + +#include + +namespace +{ + +void dump_ops(std::ostream &os, mio::circle::Reader &reader, const cirops::DumpOption &option) +{ + auto ops = reader.operators(); + for (uint32_t i = 0; i < ops->Length(); ++i) + { + const auto op = ops->Get(i); + const auto op_name = reader.opcode_name(op); + + if (option.all_graphs) + { + // NOTE all_graphs is false for now + // TODO check using '$' as split key + os << i << "$"; + } + + if (option.codes) + { + const auto op_name = reader.opcode_name(op); + os << op_name; + } + if (option.names) + { + // TODO multiple outputs? + const auto tensors = reader.tensors(); + const auto output_tensors = reader.outputs(op); + const auto output = output_tensors.at(0); + const auto tensor = tensors->Get(output); + const std::string name = mio::circle::tensor_name(tensor); + if (option.codes) + { + os << ","; + } + os << name; + } + os << std::endl; + } +} + +} // namespace + +namespace cirops +{ + +void DumpOperators::run(std::ostream &os, const circle::Model *model, const DumpOption &option) +{ + mio::circle::Reader reader(model); + + const uint32_t subgraph_size = reader.num_subgraph(); + for (uint32_t g = 0; g < subgraph_size; g++) + { + reader.select_subgraph(g); + dump_ops(os, reader, option); + + if (!option.all_graphs) + break; + } +} + +} // namespace cirops diff --git a/compiler/circle-operator/src/Dump.h b/compiler/circle-operator/src/Dump.h new file mode 100644 index 0000000..aa1d1be --- /dev/null +++ b/compiler/circle-operator/src/Dump.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DUMP_H__ +#define __DUMP_H__ + +#include + +#include + +namespace cirops +{ + +struct DumpOption +{ + bool names = false; + bool codes = false; + bool all_graphs = false; +}; + +class DumpOperators +{ +public: + DumpOperators() = default; + +public: + void run(std::ostream &os, const circle::Model *model, const DumpOption &option); +}; + +} // namespace cirops + +#endif // __DUMP_H__ diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp index a1ace4f..4b39a6d 100644 --- a/compiler/circle-opselector/driver/Driver.cpp +++ b/compiler/circle-opselector/driver/Driver.cpp @@ -159,26 +159,16 @@ int entry(int argc, char **argv) arser::Arser arser("circle-opselector provides selecting operations in circle model"); - arser.add_argument("--version") - .nargs(0) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); + arser::Helper::add_version(arser, print_version); // TODO Add new options! - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); // select option - arser.add_argument("--by_id") - .nargs(1) - .type(arser::DataType::STR) - .help("Input operation id to select nodes."); - arser.add_argument("--by_name") - .nargs(1) - .type(arser::DataType::STR) - .help("Input operation name to select nodes."); + arser.add_argument("--by_id").help("Input operation id to select nodes."); + arser.add_argument("--by_name").help("Input operation name to select nodes."); try { diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt index 0657607..ffe1b89 100644 --- a/compiler/circle-part-value-test/CMakeLists.txt +++ b/compiler/circle-part-value-test/CMakeLists.txt @@ -82,7 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON} - COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}" + COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file" + "${PARTITION_NAME}.circle" "--work_path" "${PARTITIONER_OUTPUT_PATH}" DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt index e29a66b..7b26b3b 100644 --- a/compiler/circle-partitioner-test/CMakeLists.txt +++ b/compiler/circle-partitioner-test/CMakeLists.txt @@ -57,7 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json") add_custom_command(OUTPUT ${PART_CONN_JSON} - COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}" + COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file" + "${PART_NAME}.circle" "--work_path" "${PART_OUT_PATH}" DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt index 9b8f5af..abc5d93 100644 --- a/compiler/circle-partitioner/CMakeLists.txt +++ b/compiler/circle-partitioner/CMakeLists.txt @@ -1,7 +1,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circle-partitioner "${SOURCES}") -target_link_libraries(circle-partitioner foder) target_link_libraries(circle-partitioner crew) target_link_libraries(circle-partitioner safemain) target_link_libraries(circle-partitioner luci_lang) @@ -17,22 +16,3 @@ target_link_libraries(circle-partitioner vconone) target_link_libraries(circle-partitioner nncc_common) install(TARGETS circle-partitioner DESTINATION bin) - -# TODO remove circle_partitioner -add_executable(circle_partitioner "${SOURCES}") -target_link_libraries(circle_partitioner foder) -target_link_libraries(circle_partitioner crew) -target_link_libraries(circle_partitioner safemain) -target_link_libraries(circle_partitioner luci_lang) -target_link_libraries(circle_partitioner luci_log) -target_link_libraries(circle_partitioner luci_import) -target_link_libraries(circle_partitioner luci_service) -target_link_libraries(circle_partitioner luci_pass) -target_link_libraries(circle_partitioner luci_export) -target_link_libraries(circle_partitioner luci_partition) -target_link_libraries(circle_partitioner arser) -target_link_libraries(circle_partitioner pepper_csv2vec) -target_link_libraries(circle_partitioner vconone) -target_link_libraries(circle_partitioner nncc_common) - -install(TARGETS circle_partitioner DESTINATION bin) diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md index 2e0a986..760cf28 100644 --- a/compiler/circle-partitioner/README.md +++ b/compiler/circle-partitioner/README.md @@ -4,10 +4,10 @@ _circle-partitioner_ provides model partitioning of circle model to two or more ## How circle-partitioner work -_circle-partitioner_ requires 3 positional arguments -- first: `partition` file -- second: `input` circle model file -- third: `work` folder +_circle-partitioner_ requires 3 arguments for inputs files +- `--part_file`: `partition` file, use extension `.part` +- `--input_file`: `input` circle model file +- `--work_path`: `work` path where input files reside. this is optional and CWD if omitted And options to override `partition` file as a helper to try out without editing `partition` file. - `--backends`: override `backends` of `[partition]` section @@ -20,7 +20,7 @@ are read from `work` folder. Outputs are (1) one or more partitioned circle models and (2) connection file that gives how the partitioned models should be connected to act like the source `input` model. -Why does input files be placed in `work` folder too? +Why does input files be placed in `work` path too? - this is still work in progress condition - use cases are still ambigious - original `input` model file can be used by the backend, so `.conn` file links it as `source` @@ -94,7 +94,8 @@ Net_InstanceNorm_003/ Command example ``` -./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003 +./circle-partitioner --part_file Net_InstanceNorm_003.part \ +--input_file Net_InstanceNorm_003.circle --work_path= Net_InstanceNorm_003 ``` Result of _circle-partitioner_ @@ -171,11 +172,11 @@ Consider partitioning with backends of OneRT Let's try with this command: ``` -circle_partitioner \ - --partition Net_InstanceNorm_003.part \ - --backends cpu,acl_cl \ - --default cpu \ - Net_InstanceNorm_003.circle Net_InstanceNorm_003 +circle-partitioner \ + --backends cpu,acl_cl --default cpu \ + --part_file Net_InstanceNorm_003.part \ + --input_file Net_InstanceNorm_003.circle \ + --work_path Net_InstanceNorm_003 ``` where `Net_InstanceNorm_003.part` is like this for initial design diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake index 690d953..82d9c2b 100644 --- a/compiler/circle-partitioner/requires.cmake +++ b/compiler/circle-partitioner/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("crew") require("pepper-csv2vec") require("safemain") diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp index 0151e92..5cecb9a 100644 --- a/compiler/circle-partitioner/src/CirclePartitioner.cpp +++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp @@ -18,9 +18,7 @@ #include "PartitionExport.h" #include "HelperPath.h" -#include - -#include +#include #include #include #include @@ -41,9 +39,9 @@ namespace const char *opt_bks = "--backends"; const char *opt_def = "--default"; -const char *opt_part = "partition"; -const char *opt_input = "input"; -const char *opt_work = "work"; +const char *opt_part_file = "--part_file"; +const char *opt_input_file = "--input_file"; +const char *opt_work_path = "--work_path"; void print_version(void) { @@ -53,63 +51,25 @@ void print_version(void) void build_arser(arser::Arser &arser) { - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument(opt_bks) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Backends in CSV to use for partitioning"); - - arser.add_argument(opt_def) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Default backend to assign"); - - arser.add_argument(opt_part) - .nargs(1) - .type(arser::DataType::STR) + arser::Helper::add_version(arser, print_version); + + arser.add_argument(opt_bks).help("Backends in CSV to use for partitioning"); + + arser.add_argument(opt_def).help("Default backend to assign"); + + arser.add_argument(opt_part_file) + .required(true) .help("Partition file which provides backend to assign"); - arser.add_argument(opt_input) - .nargs(1) - .type(arser::DataType::STR) - .help("Input circle model filename"); - arser.add_argument(opt_work) - .nargs(1) - .type(arser::DataType::STR) + arser.add_argument(opt_input_file).required(true).help("Input circle model filename"); + arser.add_argument(opt_work_path) .help("Work folder of partition, input files exist and output files are produced"); } std::unique_ptr load_model(const std::string &input_path) { - // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector model_data = file_loader.load(); - - // Verify flatbuffers - flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return nullptr; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; - return nullptr; - } - // Import from input Circle file - luci::Importer importer; - return importer.importModule(circle_model); + luci::ImporterEx importerex; + return importerex.importVerifyModule(input_path); } } // namespace @@ -133,9 +93,14 @@ int entry(int argc, char **argv) return EXIT_FAILURE; } - std::string partition_file = arser.get(opt_part); - std::string input_file = arser.get(opt_input); - std::string work_folder = arser.get(opt_work); + std::string partition_file = arser.get(opt_part_file); + std::string input_file = arser.get(opt_input_file); + std::string work_folder = "."; + + if (arser[opt_work_path]) + { + work_folder = arser.get(opt_work_path); + } std::string partition_path = work_folder + "/" + partition_file; std::string input_path = work_folder + "/" + input_file; diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt index 5ec8b6e..a3a2902 100644 --- a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt +++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt @@ -18,7 +18,7 @@ unset(TEST_NAMES) get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) set(options USE_QCONFIG) -set(oneValueArgs DTYPE GRANULARITY) +set(oneValueArgs DTYPE GRANULARITY INPUT_DTYPE OUTPUT_DTYPE) set(multiValueArgs "") macro(Add RECIPE) @@ -29,6 +29,16 @@ macro(Add RECIPE) set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json") endif() + set(INPUT_DTYPE_OPT "") + if(ARG_INPUT_DTYPE) + set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}") + endif() + + set(OUTPUT_DTYPE_OPT "") + if(ARG_OUTPUT_DTYPE) + set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}") + endif() + set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle") set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle") set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle") @@ -38,7 +48,10 @@ macro(Add RECIPE) add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH} COMMAND $ --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH} COMMAND $ --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH} - COMMAND $ --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH} + COMMAND $ + --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} + ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH} + ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT} DEPENDS circle-quantizer record-minmax diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst index 1881030..58f89c7 100644 --- a/compiler/circle-quantizer-dredd-recipe-test/test.lst +++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst @@ -6,10 +6,75 @@ ## TFLITE RECIPE +# MPQ Test (default: u8, target: s16) +Add(Quant_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_AveragePool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_BatchMatMul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Concatenation_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_003 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_DepthwiseConv2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_FullyConnected_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_LeakyRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Logistic_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_MaxPool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mean_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Neg_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Pad_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_PRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU6_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Reshape_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeBilinear_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeNearestNeighbor_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Slice_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Softmax_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Tanh_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Transpose_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_TransposeConv_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) + +# MPQ Test (default: s16, target: u8) +Add(Quant_Add_002 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_AveragePool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_BatchMatMul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Concatenation_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_004 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_DepthwiseConv2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_FullyConnected_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_LeakyRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Logistic_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_MaxPool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mean_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Neg_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Pad_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_PRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU6_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Reshape_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeBilinear_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeNearestNeighbor_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Slice_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Softmax_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Tanh_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Transpose_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_TransposeConv_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) + Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_000 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32) +Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32) +Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32) AddFakeQuant(Quant_Add_000) + +## CIRCLE RECIPE + +# MPQ Test (default: u8, target: s16) +Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) + +# MPQ Test (default: s16, target: u8) +Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt index 14e0097..16e41a3 100644 --- a/compiler/circle-quantizer/CMakeLists.txt +++ b/compiler/circle-quantizer/CMakeLists.txt @@ -10,7 +10,6 @@ add_executable(circle-quantizer "${SOURCES}") target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS}) target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB}) -target_link_libraries(circle-quantizer foder) target_link_libraries(circle-quantizer safemain) target_link_libraries(circle-quantizer oops) target_link_libraries(circle-quantizer loco) diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake index c21e28e..4fcee18 100644 --- a/compiler/circle-quantizer/requires.cmake +++ b/compiler/circle-quantizer/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("loco") require("locop") require("safemain") diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index e0c85cb..f1e31ed 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -14,9 +14,7 @@ * limitations under the License. */ -#include - -#include +#include #include #include #include @@ -59,13 +57,31 @@ std::vector> read_layer_params(std::string &filename std::vector> p; for (auto layer : layers) { - auto l = std::make_shared(); + if (layer.isMember("name")) { - l->name = layer["name"].asString(); - l->dtype = layer["dtype"].asString(); - l->granularity = layer["granularity"].asString(); + auto l = std::make_shared(); + { + l->name = layer["name"].asString(); + l->dtype = layer["dtype"].asString(); + l->granularity = layer["granularity"].asString(); + } + p.emplace_back(l); + } + + // Multiple names with the same dtype & granularity + if (layer.isMember("names")) + { + for (auto name : layer["names"]) + { + auto l = std::make_shared(); + { + l->name = name.asString(); + l->dtype = layer["dtype"].asString(); + l->granularity = layer["granularity"].asString(); + } + p.emplace_back(l); + } } - p.emplace_back(l); } return p; @@ -109,23 +125,12 @@ int entry(int argc, char **argv) arser::Arser arser("circle-quantizer provides circle model quantization"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); arser.add_argument(qdqw) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .help("Quantize-dequantize weight values required action before quantization. " "Three arguments required: input_model_dtype(float32) " "output_model_dtype(uint8) granularity(layer, channel)"); @@ -133,28 +138,24 @@ int entry(int argc, char **argv) arser.add_argument(qwmm) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .help("Quantize with min/max values. " "Three arguments required: input_model_dtype(float32) " "output_model_dtype(uint8) granularity(layer, channel)"); arser.add_argument(tf_maxpool) .nargs(0) - .required(false) .default_value(false) .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can " "degrade accuracy of some models"); arser.add_argument(fake_quant) .nargs(0) - .required(false) .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will " "generate an fp32 model."); arser.add_argument(rq) .nargs(2) .type(arser::DataType::STR_VEC) - .required(false) .help("Requantize a quantized model. " "Two arguments required: input_model_dtype(int8) " "output_model_dtype(uint8)"); @@ -162,7 +163,6 @@ int entry(int argc, char **argv) arser.add_argument(fq) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .accumulated(true) .help("Write quantization parameters to the specified tensor. " "Three arguments required: tensor_name(string), " @@ -171,32 +171,21 @@ int entry(int argc, char **argv) arser.add_argument(cq) .nargs(2) .type(arser::DataType::STR_VEC) - .required(false) .accumulated(true) .help("Copy quantization parameter from a tensor to another tensor." "Two arguments required: source_tensor_name(string), " "destination_tensor_name(string)"); arser.add_argument("--input_type") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Input type of quantized model (uint8 or int16)"); + .help("Input type of quantized model (uint8, int16, or float32)"); arser.add_argument("--output_type") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Output type of quantized model (uint8 or int16)"); + .help("Output type of quantized model (uint8, int16, or float32)"); - arser.add_argument(cfg) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Path to the quantization configuration file"); + arser.add_argument(cfg).help("Path to the quantization configuration file"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); arser.add_argument(gpd).nargs(0).required(false).default_value(false).help( "This will turn on profiling data generation."); @@ -384,27 +373,10 @@ int entry(int argc, char **argv) settings->set(luci::UserSettings::Key::ProfilingDataGen, true); // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector model_data = file_loader.load(); - - // Verify flatbuffers - flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + luci::ImporterEx importerex; + auto module = importerex.importVerifyModule(input_path); + if (module.get() == nullptr) return EXIT_FAILURE; - } - - // Import from input Circle file - luci::Importer importer; - auto module = importer.importModule(circle_model); for (size_t idx = 0; idx < module->size(); ++idx) { diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp index 70f3c8d..c32dc3f 100644 --- a/compiler/circle-tensordump/driver/Driver.cpp +++ b/compiler/circle-tensordump/driver/Driver.cpp @@ -31,11 +31,9 @@ int entry(int argc, char **argv) arser::Arser arser{ "circle-tensordump allows users to retrieve tensor information from a Circle model file"}; - arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump"); + arser.add_argument("circle").help("Circle file path to dump"); arser.add_argument("--tensors").nargs(0).help("Dump to console"); arser.add_argument("--tensors_to_hdf5") - .nargs(1) - .type(arser::DataType::STR) .help("Dump to hdf5 file. Specify hdf5 file path to be dumped"); try diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp index e477a74..49afa73 100644 --- a/compiler/circle-tensordump/src/Dump.cpp +++ b/compiler/circle-tensordump/src/Dump.cpp @@ -15,7 +15,8 @@ */ #include "Dump.h" -#include "Reader.h" + +#include #include @@ -102,7 +103,7 @@ namespace circletensordump void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &) { - circletensordump::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); auto buffers = reader.buffers(); @@ -296,7 +297,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model, const std::string &output_path) { // loads a circle model - circletensordump::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); // create a hdf5 file diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp deleted file mode 100644 index 47b8760..0000000 --- a/compiler/circle-tensordump/src/Reader.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Reader.h" - -#include - -#include -#include - -namespace circletensordump -{ - -Reader::Reader(const circle::Model *model) -{ - _subgraphs = model->subgraphs(); - _buffers = model->buffers(); - - auto opcodes = model->operator_codes(); - for (const ::circle::OperatorCode *opcode : *opcodes) - { - _op_codes.push_back(opcode); - } -} - -size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) -{ - if (buff_data != nullptr) - { - *buff_data = nullptr; - } - - if (buf_idx == 0) - return 0; - - if (auto *buffer = (*_buffers)[buf_idx]) - { - if (auto *array = buffer->data()) - { - if (size_t size = array->size()) - { - if (buff_data != nullptr) - { - *buff_data = reinterpret_cast(array->data()); - } - return size; - } - } - } - - return 0; -} - -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - return mio::circle::builtin_code_neutral(opcode); -} - -std::string Reader::opcode_name(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - if (!mio::circle::is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } - - return mio::circle::opcode_name(opcode); -} - -bool Reader::select_subgraph(uint32_t sgindex) -{ - _tensors = nullptr; - _operators = nullptr; - - _inputs.clear(); - _outputs.clear(); - - if (_subgraphs->Length() <= sgindex) - { - assert(false); - return false; - } - - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; - - _tensors = subgraph->tensors(); - _operators = subgraph->operators(); - - _inputs = as_index_vector(subgraph->inputs()); - _outputs = as_index_vector(subgraph->outputs()); - - return true; -} - -} // namespace circletensordump diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h deleted file mode 100644 index c868bc2..0000000 --- a/compiler/circle-tensordump/src/Reader.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLE_TENSORDUMP_READER_H__ -#define __CIRCLE_TENSORDUMP_READER_H__ - -#include - -#include -#include -#include - -namespace circletensordump -{ - -template std::vector as_index_vector(const flatbuffers::Vector *flat_array) -{ - std::vector ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} - -/** - * @brief Loads Circle file and provides helpers to access attributes - */ -class Reader -{ -private: - using CircleSubGraphs_t = flatbuffers::Vector>; - using CircleBuffers_t = flatbuffers::Vector>; - using CircleTensors_t = flatbuffers::Vector>; - using CircleOperators_t = flatbuffers::Vector>; - -public: - Reader(const circle::Model *model); - - Reader() = delete; - -public: - const std::vector &opcodes() { return _op_codes; } - const CircleBuffers_t *buffers() { return _buffers; } - const CircleTensors_t *tensors() { return _tensors; } - const CircleOperators_t *operators() { return _operators; } - const std::vector &inputs() const { return _inputs; } - const std::vector &outputs() const { return _outputs; } - - uint32_t num_subgraph() const { return _subgraphs->Length(); } - - size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; - -public: - bool select_subgraph(uint32_t subgraph); - -private: - const CircleSubGraphs_t *_subgraphs{nullptr}; - const CircleBuffers_t *_buffers{nullptr}; - const CircleTensors_t *_tensors{nullptr}; - const CircleOperators_t *_operators{nullptr}; - - std::vector _op_codes; - std::vector _inputs; - std::vector _outputs; -}; - -} // namespace circletensordump - -#endif // __CIRCLE_TENSORDUMP_READER_H__ diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp index 7a44c65..c3a4147 100644 --- a/compiler/circle-verify/src/Driver.cpp +++ b/compiler/circle-verify/src/Driver.cpp @@ -25,7 +25,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to verify"); + arser.add_argument("circle").help("Circle file path to verify"); try { diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index f41aac3..a6f2786 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -31,6 +31,8 @@ Add(Net_TConv_Add_002 PASS fuse_add_with_tconv) Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv) Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv) Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv) +Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv) +Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv) Add(Net_InstanceNorm_001 PASS fuse_instnorm) Add(Net_InstanceNorm_003 PASS fuse_instnorm) Add(Net_InstanceNorm_004 PASS fuse_instnorm) @@ -46,6 +48,7 @@ Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape) Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax) +Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul) ## CIRCLE RECIPE diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt index cd79967..dbe485b 100644 --- a/compiler/circle2circle/CMakeLists.txt +++ b/compiler/circle2circle/CMakeLists.txt @@ -4,7 +4,6 @@ list(REMOVE_ITEM SOURCES ${TESTS}) add_executable(circle2circle "${SOURCES}") target_include_directories(circle2circle PRIVATE src) -target_link_libraries(circle2circle foder) target_link_libraries(circle2circle nncc_common) target_link_libraries(circle2circle safemain) target_link_libraries(circle2circle oops) @@ -29,7 +28,6 @@ nnas_find_package(GTest REQUIRED) GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES}) target_include_directories(circle2circle_test PRIVATE src) -target_link_libraries(circle2circle_test foder) target_link_libraries(circle2circle_test nncc_common) target_link_libraries(circle2circle_test oops) target_link_libraries(circle2circle_test hermes) diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake index b6c6119..4e5ed0d 100644 --- a/compiler/circle2circle/requires.cmake +++ b/compiler/circle2circle/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("loco") require("locop") require("logo-core") diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index ae677a3..f5cf0d7 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -14,9 +14,7 @@ * limitations under the License. */ -#include - -#include +#include #include #include #include @@ -54,6 +52,11 @@ void csv_tokenize(const std::string &data, std::vector &result) result.push_back(token); } +void add_switch(arser::Arser &arser, const char *opt, const char *desc) +{ + arser.add_argument(opt).nargs(0).default_value(false).help(desc); +} + int entry(int argc, char **argv) { // Simple argument parser (based on map) @@ -64,368 +67,125 @@ int entry(int argc, char **argv) arser::Arser arser("circle2circle provides circle model optimization and transformations"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); - - arser.add_argument("--O1").nargs(0).required(false).default_value(false).help( - "Enable O1 optimize options"); - - arser.add_argument("--fold_add_v2") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold AddV2 operators with constant inputs"); - - arser.add_argument("--fold_cast") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Cast operators with constant input"); - - arser.add_argument("--fold_dequantize") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold dequantize op"); - - arser.add_argument("--fold_dwconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Depthwise Convolution operator with constant inputs"); - - arser.add_argument("--fold_gather") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Gather operator"); - - arser.add_argument("--fold_sparse_to_dense") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold SparseToDense operator"); - - arser.add_argument("--forward_reshape_to_unaryop") - .nargs(0) - .required(false) - .default_value(false) - .help("This will move Reshape after UnaryOp for centain condition"); - - arser.add_argument("--fuse_activation_function") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Activation function to a preceding operator"); - - arser.add_argument("--fuse_add_with_fully_connected") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Add operator to FullyConnected operator"); - - arser.add_argument("--fuse_add_with_tconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Add operator to Transposed Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_conv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_dwconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Depthwise Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_tconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Transposed Convolution operator"); - - arser.add_argument("--fuse_bcq") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse operators and apply Binary Coded Quantization"); - - arser.add_argument("--fuse_instnorm") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse operators to InstanceNorm operator"); - - arser.add_argument("--fuse_mean_with_mean") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse two Mean operations when they follow one by one." - "This will fold them into one operation and merge reduction indices."); - - arser.add_argument("--fuse_transpose_with_mean") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Mean operation with a preceding Transpose under certain conditions."); - - arser.add_argument("--make_batchnorm_gamma_positive") - .nargs(0) - .required(false) - .default_value(false) - .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note " - "that this pass can change the execution result of the model. So, use it only when the " - "impact is known to be acceptable."); - - arser.add_argument("--fuse_preactivation_batchnorm") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators of pre-activations to Convolution operator"); - - arser.add_argument("--remove_fakequant") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove FakeQuant operators"); - - arser.add_argument("--remove_quantdequant") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove Quantize-Dequantize sequence"); - - arser.add_argument("--remove_redundant_quantize") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove redundant Quantize operators"); - - arser.add_argument("--remove_redundant_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse or remove subsequent Reshape operators"); - - arser.add_argument("--remove_redundant_transpose") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse or remove subsequent Transpose operators"); - - arser.add_argument("--remove_unnecessary_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary reshape operators"); - - arser.add_argument("--remove_unnecessary_slice") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary slice operators"); - - arser.add_argument("--remove_unnecessary_strided_slice") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary strided slice operators"); - - arser.add_argument("--remove_unnecessary_split") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary split operators"); - - arser.add_argument("--replace_cw_mul_add_with_depthwise_conv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will replace channel-wise mul/add with DepthwiseConv2D operator"); - - arser.add_argument("--replace_sub_with_add") - .nargs(0) - .required(false) - .default_value(false) - .help("This will replace sub with add operator"); - - arser.add_argument("--resolve_customop_add") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(Add) to Add operator"); - - arser.add_argument("--resolve_customop_batchmatmul") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(BatchMatmul) to BatchMatmul operator"); - - arser.add_argument("--resolve_customop_matmul") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(Matmul) to Matmul operator"); - - arser.add_argument("--resolve_customop_max_pool_with_argmax") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators"); - - arser.add_argument("--shuffle_weight_to_16x1float32") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " - "it only converts weights whose row is a multiple of 16"); - - arser.add_argument("--substitute_pack_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert single input Pack to Reshape"); - - arser.add_argument("--substitute_padv2_to_pad") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition PadV2 to Pad"); - - arser.add_argument("--substitute_splitv_to_split") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition SplitV to Split operator"); - - arser.add_argument("--substitute_squeeze_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition Squeeze to Reshape"); - - arser.add_argument("--substitute_strided_slice_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition Strided_Slice to Reshape"); - - arser.add_argument("--substitute_transpose_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert single input Transpose to Reshape"); - - arser.add_argument("--expand_broadcast_const") - .nargs(0) - .required(false) - .default_value(false) - .help("This will expand broadcastable constant inputs"); - - arser.add_argument("--convert_nchw_to_nhwc") - .nargs(0) - .required(false) - .default_value(false) - .help("Experimental: This will convert NCHW operators to NHWC under the assumption that " - "input model is NCHW."); - - arser.add_argument("--nchw_to_nhwc_input_shape") - .nargs(0) - .required(false) - .default_value(false) - .help("Convert the input shape of the model (argument for --convert_nchw_to_nhwc)."); - - arser.add_argument("--nchw_to_nhwc_output_shape") - .nargs(0) - .required(false) - .default_value(false) - .help("Convert the output shape of the model (argument for --convert_nchw_to_nhwc)."); - - arser.add_argument("--transform_min_max_to_relu6") - .nargs(0) - .required(false) - .default_value(false) - .help("Transform Minimum(6)-Maximum(0) pattern to Relu6 operator"); - - arser.add_argument("--transform_min_relu_to_relu6") - .nargs(0) - .required(false) - .default_value(false) - .help("Transform Minimum(6)-Relu pattern to Relu6 operator"); - - arser.add_argument("--mute_warnings") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn off warning messages"); - - arser.add_argument("--disable_validation") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn off operator validations. May help input model investigation."); - - arser.add_argument("--generate_profile_data") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn on profiling data generation."); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); + + add_switch(arser, "--fold_add_v2", "This will fold AddV2 operators with constant inputs"); + add_switch(arser, "--fold_cast", "This will fold Cast operators with constant input"); + add_switch(arser, "--fold_densify", + "This will fold Densify operators with sparse constant input"); + add_switch(arser, "--fold_dequantize", "This will fold dequantize op"); + add_switch(arser, "--fold_dwconv", + "This will fold Depthwise Convolution operator with constant inputs"); + add_switch(arser, "--fold_gather", "This will fold Gather operator"); + add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator"); + add_switch(arser, "--forward_reshape_to_unaryop", + "This will move Reshape after UnaryOp for centain condition"); + add_switch(arser, "--fuse_activation_function", + "This will fuse Activation function to a preceding operator"); + add_switch(arser, "--fuse_add_with_fully_connected", + "This will fuse Add operator to FullyConnected operator"); + add_switch(arser, "--fuse_add_with_tconv", + "This will fuse Add operator to Transposed Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_conv", + "This will fuse BatchNorm operators to Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_dwconv", + "This will fuse BatchNorm operators to Depthwise Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_tconv", + "This will fuse BatchNorm operators to Transposed Convolution operator"); + add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization"); + add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator"); + add_switch(arser, "--fuse_mean_with_mean", + "This will fuse two Mean operations when they follow one by one. This will fold them " + "into one operation and merge reduction indices."); + add_switch(arser, "--fuse_transpose_with_mean", + "This will fuse Mean operation with a preceding Transpose under certain conditions."); + add_switch(arser, "--make_batchnorm_gamma_positive", + "This will make negative gamma of BatchNorm into a small positive value (1e-10). " + "Note that this pass can change the execution result of the model. So, use it only " + "when the impact is known to be acceptable."); + add_switch(arser, "--fuse_preactivation_batchnorm", + "This will fuse BatchNorm operators of pre-activations to Convolution operator"); + add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators"); + add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence"); + add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators"); + add_switch(arser, "--remove_redundant_reshape", + "This will fuse or remove subsequent Reshape operators"); + add_switch(arser, "--remove_redundant_transpose", + "This will fuse or remove subsequent Transpose operators"); + add_switch(arser, "--remove_unnecessary_reshape", + "This will remove unnecessary reshape operators"); + add_switch(arser, "--remove_unnecessary_slice", "This will remove unnecessary slice operators"); + add_switch(arser, "--remove_unnecessary_strided_slice", + "This will remove unnecessary strided slice operators"); + add_switch(arser, "--remove_unnecessary_split", "This will remove unnecessary split operators"); + add_switch(arser, "--replace_cw_mul_add_with_depthwise_conv", + "This will replace channel-wise mul/add with DepthwiseConv2D operator"); + add_switch(arser, "--replace_sub_with_add", "This will replace sub with add operator"); + add_switch(arser, "--resolve_customop_add", "This will convert Custom(Add) to Add operator"); + add_switch(arser, "--resolve_customop_batchmatmul", + "This will convert Custom(BatchMatmul) to BatchMatmul operator"); + add_switch(arser, "--resolve_customop_matmul", + "This will convert Custom(Matmul) to Matmul operator"); + add_switch(arser, "--resolve_customop_max_pool_with_argmax", + "This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators"); + add_switch(arser, "--resolve_customop_splitv", + "This will convert Custom(SplitV) to SplitV operator"); + add_switch(arser, "--shuffle_weight_to_16x1float32", + "This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " + "it only converts weights whose row is a multiple of 16"); + add_switch(arser, "--replace_non_const_fc_with_batch_matmul", + "Replace FullyConnected with BatchMatMul when its weight is non-constant"); + add_switch(arser, "--substitute_pack_to_reshape", + "This will convert single input Pack to Reshape"); + add_switch(arser, "--substitute_padv2_to_pad", + "This will convert certain condition PadV2 to Pad"); + add_switch(arser, "--substitute_splitv_to_split", + "This will convert certain condition SplitV to Split operator"); + add_switch(arser, "--substitute_squeeze_to_reshape", + "This will convert certain condition Squeeze to Reshape"); + add_switch(arser, "--substitute_strided_slice_to_reshape", + "This will convert certain condition Strided_Slice to Reshape"); + add_switch(arser, "--substitute_transpose_to_reshape", + "This will convert single input Transpose to Reshape"); + add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs"); + add_switch(arser, "--convert_nchw_to_nhwc", + "Experimental: This will convert NCHW operators to NHWC under the assumption that " + "input model is NCHW."); + add_switch(arser, "--nchw_to_nhwc_input_shape", + "Convert the input shape of the model (argument for --convert_nchw_to_nhwc)."); + add_switch(arser, "--nchw_to_nhwc_output_shape", + "Convert the output shape of the model (argument for --convert_nchw_to_nhwc)."); + add_switch(arser, "--transform_min_max_to_relu6", + "Transform Minimum(6)-Maximum(0) pattern to Relu6 operator"); + add_switch(arser, "--transform_min_relu_to_relu6", + "Transform Minimum(6)-Relu pattern to Relu6 operator"); + add_switch(arser, "--mute_warnings", "This will turn off warning messages"); + add_switch(arser, "--disable_validation", + "This will turn off operator validations. May help input model investigation."); + add_switch(arser, "--generate_profile_data", "This will turn on profiling data generation."); arser.add_argument("--change_outputs") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Experimental: Change first subgraph output nodes to CSV names"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); // sparsification argument - arser.add_argument("--sparsify_tensor") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Tensor name that you want to sparsify"); + arser.add_argument("--sparsify_tensor").help("Tensor name that you want to sparsify"); arser.add_argument("--sparsify_traversal_order") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("0,1,2,3") .help("Traversal order of dimensions. Default value: 0,1,2,3"); arser.add_argument("--sparsify_format") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("d,s") .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default " "value: d,s"); - arser.add_argument("--sparsify_block_size") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Size of each block dimension"); + arser.add_argument("--sparsify_block_size").help("Size of each block dimension"); arser.add_argument("--sparsify_block_map") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("0,1") .help("Map from block dimension to the original tensor dimension. Default value: 0,1"); @@ -446,20 +206,12 @@ int entry(int argc, char **argv) // If REPLACE is zero, it does not overwrite an existing value. setenv("LUCI_LOG", "100", 0); } - if (arser.get("--O1")) - { - options->enable(Algorithms::FuseBCQ); - options->enable(Algorithms::FuseInstanceNorm); - options->enable(Algorithms::ResolveCustomOpAdd); - options->enable(Algorithms::ResolveCustomOpBatchMatMul); - options->enable(Algorithms::ResolveCustomOpMatMul); - options->enable(Algorithms::RemoveRedundantTranspose); - options->enable(Algorithms::SubstitutePackToReshape); - } if (arser.get("--fold_add_v2")) options->enable(Algorithms::FoldAddV2); if (arser.get("--fold_cast")) options->enable(Algorithms::FoldCast); + if (arser.get("--fold_densify")) + options->enable(Algorithms::FoldDensify); if (arser.get("--fold_dequantize")) options->enable(Algorithms::FoldDequantize); if (arser.get("--fold_dwconv")) @@ -524,8 +276,12 @@ int entry(int argc, char **argv) options->enable(Algorithms::ResolveCustomOpMatMul); if (arser.get("--resolve_customop_max_pool_with_argmax")) options->enable(Algorithms::ResolveCustomOpMaxPoolWithArgmax); + if (arser.get("--resolve_customop_splitv")) + options->enable(Algorithms::ResolveCustomOpSplitV); if (arser.get("--shuffle_weight_to_16x1float32")) options->enable(Algorithms::ShuffleWeightTo16x1Float32); + if (arser.get("--replace_non_const_fc_with_batch_matmul")) + options->enable(Algorithms::ReplaceNonConstFCWithBatchMatMul); if (arser.get("--substitute_pack_to_reshape")) options->enable(Algorithms::SubstitutePackToReshape); if (arser.get("--substitute_padv2_to_pad")) @@ -595,37 +351,11 @@ int entry(int argc, char **argv) csv_tokenize(csv_nodes, new_outputs); } - // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector model_data; - - try - { - model_data = file_loader.load(); - } - catch (const std::runtime_error &err) - { - std::cerr << err.what() << std::endl; - return EXIT_FAILURE; - } - - flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - // Import from input Circle file - luci::Importer importer; - auto module = importer.importModule(circle_model); + luci::ImporterEx importerex; + auto module = importerex.importVerifyModule(input_path); + if (module.get() == nullptr) + return EXIT_FAILURE; if (change_outputs) { diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp index 76d0f3f..9c4256b 100644 --- a/compiler/circlechef/tools/file/Driver.cpp +++ b/compiler/circlechef/tools/file/Driver.cpp @@ -28,10 +28,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("recipe") - .type(arser::DataType::STR) - .help("Source recipe file path to convert"); - arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path"); + arser.add_argument("recipe").help("Source recipe file path to convert"); + arser.add_argument("circle").help("Target circle file path"); try { diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp index 639e0af..c8ef07c 100644 --- a/compiler/circlechef/tools/reverse/Driver.cpp +++ b/compiler/circlechef/tools/reverse/Driver.cpp @@ -25,10 +25,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle") - .type(arser::DataType::STR) - .help("Source circle file path to convert"); - arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path"); + arser.add_argument("circle").help("Source circle file path to convert"); + arser.add_argument("recipe").help("Target recipe file path"); try { diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt index b65c066..7485ff8 100644 --- a/compiler/circledump/CMakeLists.txt +++ b/compiler/circledump/CMakeLists.txt @@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circledump ${DRIVER} ${SOURCES}) target_include_directories(circledump PRIVATE include) target_link_libraries(circledump arser) +target_link_libraries(circledump foder) target_link_libraries(circledump mio_circle04) target_link_libraries(circledump mio_circle04_helper) target_link_libraries(circledump safemain) diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp index 657f24f..5b0871a 100644 --- a/compiler/circledump/driver/Driver.cpp +++ b/compiler/circledump/driver/Driver.cpp @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include @@ -23,7 +23,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to dump"); + arser.add_argument("circle").help("Circle file path to dump"); try { @@ -38,14 +38,10 @@ int entry(int argc, char **argv) std::string circle_path = arser.get("circle"); // Load Circle model from a circle file - std::unique_ptr model = circleread::load_circle(circle_path); - if (model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl; - return 255; - } - - const circle::Model *circlemodel = model->model(); + foder::FileLoader fileLoader{circle_path}; + std::vector modelData = fileLoader.load(); + const circle::Model *circlemodel = circle::GetModel(modelData.data()); + // const circle::Model *circlemodel = model->model(); if (circlemodel == nullptr) { std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl; diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/circledump/include/circleread/Model.h deleted file mode 100644 index 234db8b..0000000 --- a/compiler/circledump/include/circleread/Model.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLEREAD_MODEL_H__ -#define __CIRCLEREAD_MODEL_H__ - -#include - -#include - -namespace circleread -{ - -struct Model -{ - virtual ~Model() = default; - - virtual const ::circle::Model *model(void) const = 0; -}; - -/** - * @brief Load Circle model (as a raw Model) from a given path - * - * @note May return a nullptr - */ -std::unique_ptr load_circle(const std::string &path); - -} // namespace circleread - -#endif // __CIRCLEREAD_MODEL_H__ diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake index 362d67c..183dfe2 100644 --- a/compiler/circledump/requires.cmake +++ b/compiler/circledump/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-circle04") require("safemain") diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp index 0b256dd..69427a2 100644 --- a/compiler/circledump/src/Dump.cpp +++ b/compiler/circledump/src/Dump.cpp @@ -16,8 +16,8 @@ #include #include +#include -#include "Read.h" #include "OpPrinter.h" #include "MetadataPrinter.h" @@ -122,7 +122,7 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector *fbvect) return os; } -void dump_sub_graph(std::ostream &os, circleread::Reader &reader) +void dump_sub_graph(std::ostream &os, mio::circle::Reader &reader) { auto tensors = reader.tensors(); auto operators = reader.operators(); @@ -150,14 +150,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) std::vector dims = {-1}; if (tensor->shape()) - dims = circleread::as_index_vector(tensor->shape()); + dims = mio::circle::as_index_vector(tensor->shape()); os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor) << " "; os << "(" << dims << ") "; if (tensor->shape_signature()) { - std::vector dims_sig = circleread::as_index_vector(tensor->shape_signature()); + std::vector dims_sig = mio::circle::as_index_vector(tensor->shape_signature()); os << "(" << dims_sig << ") "; } os << "B(" << tensor->buffer() << ") "; @@ -299,8 +299,8 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) const auto op = operators->Get(i); circle::BuiltinOperator builtincode = reader.builtin_code(op); - const std::vector &inputs = circleread::as_index_vector(op->inputs()); - const std::vector &outputs = circleread::as_index_vector(op->outputs()); + const std::vector &inputs = mio::circle::as_index_vector(op->inputs()); + const std::vector &outputs = mio::circle::as_index_vector(op->outputs()); auto op_name = reader.opcode_name(op); os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " "; @@ -356,7 +356,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) void dump_model(std::ostream &os, const circle::Model *model) { - circleread::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp deleted file mode 100644 index 67e7fa5..0000000 --- a/compiler/circledump/src/Load.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include - -namespace -{ - -class MemoryMappedModel final : public circleread::Model -{ -public: - /** - * @require fd and data SHOULD be valid - */ - explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size} - { - // DO NOTHING - } - -public: - ~MemoryMappedModel() - { - munmap(_data, _size); - close(_fd); - } - -public: - MemoryMappedModel(const MemoryMappedModel &) = delete; - MemoryMappedModel(MemoryMappedModel &&) = delete; - -public: - const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); } - -private: - int _fd = -1; - void *_data = nullptr; - size_t _size = 0; -}; - -class FileDescriptor final -{ -public: - FileDescriptor(int value) : _value{value} - { - // DO NOTHING - } - -public: - // NOTE Copy is not allowed - FileDescriptor(const FileDescriptor &) = delete; - -public: - // NOTE Move is allowed - FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); } - -public: - ~FileDescriptor() - { - if (_value != -1) - { - // Close on destructor - close(_value); - } - } - -public: - int value(void) const { return _value; } - -public: - int release(void) - { - auto res = _value; - _value = -1; - return res; - } - -private: - int _value = -1; -}; - -} // namespace - -namespace circleread -{ - -std::unique_ptr load_circle(const std::string &path) -{ - FileDescriptor fd = open(path.c_str(), O_RDONLY); - - if (fd.value() == -1) - { - // Return nullptr on open failure - return nullptr; - } - - struct stat st; - if (fstat(fd.value(), &st) == -1) - { - // Return nullptr on fstat failure - return nullptr; - } - - auto size = st.st_size; - auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0); - - if (data == MAP_FAILED) - { - // Return nullptr on mmap failure - return nullptr; - } - - return std::unique_ptr{new MemoryMappedModel(fd.release(), data, size)}; -} - -} // namespace circleread diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp index 02e5c26..817371d 100644 --- a/compiler/circledump/src/OpPrinter.cpp +++ b/compiler/circledump/src/OpPrinter.cpp @@ -15,7 +15,8 @@ */ #include "OpPrinter.h" -#include "Read.h" + +#include #include @@ -233,7 +234,7 @@ public: { if (auto *reshape_params = op->builtin_options_as_ReshapeOptions()) { - auto new_shape = circleread::as_index_vector(reshape_params->new_shape()); + auto new_shape = mio::circle::as_index_vector(reshape_params->new_shape()); os << " "; os << "NewShape(" << new_shape << ")"; os << std::endl; @@ -802,6 +803,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for CEIL _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique(); _op_map[circle::BuiltinOperator_CONV_2D] = make_unique(); + // There is no Option for DENSIFY _op_map[circle::BuiltinOperator_DEPTH_TO_SPACE] = make_unique(); _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique(); // There is no Option for DEQUANTIZE diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp deleted file mode 100644 index 3a7e98c..0000000 --- a/compiler/circledump/src/Read.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Read.h" - -#include - -#include -#include - -namespace circleread -{ - -Reader::Reader(const circle::Model *model) -{ - _version = model->version(); - _subgraphs = model->subgraphs(); - _buffers = model->buffers(); - _metadata = model->metadata(); - _signature_defs = model->signature_defs(); - - auto opcodes = model->operator_codes(); - for (const ::circle::OperatorCode *opcode : *opcodes) - { - _op_codes.push_back(opcode); - } -} - -size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) -{ - *buff_data = nullptr; - - if (buf_idx == 0) - return 0; - - if (auto *buffer = (*_buffers)[buf_idx]) - { - if (auto *array = buffer->data()) - { - if (size_t size = array->size()) - { - *buff_data = reinterpret_cast(array->data()); - return size; - } - } - } - - return 0; -} - -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - return opcode->builtin_code(); -} - -std::string Reader::opcode_name(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - if (!mio::circle::is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } - - return mio::circle::opcode_name(opcode); -} - -bool Reader::select_subgraph(uint32_t sgindex) -{ - _subgraph_index = sgindex; - _tensors = nullptr; - _operators = nullptr; - - _inputs.clear(); - _outputs.clear(); - - if (_subgraphs->Length() <= sgindex) - { - assert(false); - return false; - } - - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; - - auto name = subgraph->name(); - _subgraph_name = name ? name->c_str() : "(noname)"; - - _tensors = subgraph->tensors(); - _operators = subgraph->operators(); - _data_format = subgraph->data_format(); - - _inputs = as_index_vector(subgraph->inputs()); - _outputs = as_index_vector(subgraph->outputs()); - - return true; -} - -} // namespace circleread diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h deleted file mode 100644 index 05b0e50..0000000 --- a/compiler/circledump/src/Read.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLEREAD_READ_H__ -#define __CIRCLEREAD_READ_H__ - -#include - -#include -#include -#include - -namespace circleread -{ - -template std::vector as_index_vector(const flatbuffers::Vector *flat_array) -{ - if (flat_array == nullptr) - { - throw std::runtime_error("flat array is nullptr"); - } - - std::vector ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} - -/** - * @brief Loads Circle file and provides helpers to access attributes - */ -class Reader -{ -private: - using CircleSubGraphs_t = flatbuffers::Vector>; - using CircleBuffers_t = flatbuffers::Vector>; - using CircleTensors_t = flatbuffers::Vector>; - using CircleOperators_t = flatbuffers::Vector>; - using CircleMetadata_t = flatbuffers::Vector>; - using CircleSignatureDef_t = flatbuffers::Vector>; - -public: - Reader(const circle::Model *model); - - Reader() = delete; - -public: - uint32_t version() const { return _version; } - - const std::vector &opcodes() { return _op_codes; } - const CircleBuffers_t *buffers() { return _buffers; } - const CircleTensors_t *tensors() { return _tensors; } - const CircleOperators_t *operators() { return _operators; } - const std::vector &inputs() const { return _inputs; } - const std::vector &outputs() const { return _outputs; } - const circle::DataFormat &data_format() const { return _data_format; } - const CircleMetadata_t *metadata() const { return _metadata; } - const CircleSignatureDef_t *signature_defs() const { return _signature_defs; } - - uint32_t num_subgraph() const { return _subgraphs->Length(); } - - size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; - -public: - bool select_subgraph(uint32_t subgraph); - const std::string &subgraph_name(void) const { return _subgraph_name; } - uint32_t subgraph_index(void) const { return _subgraph_index; } - -private: - uint32_t _version; - - const CircleSubGraphs_t *_subgraphs{nullptr}; - const CircleBuffers_t *_buffers{nullptr}; - const CircleTensors_t *_tensors{nullptr}; - const CircleOperators_t *_operators{nullptr}; - const CircleMetadata_t *_metadata{nullptr}; - const CircleSignatureDef_t *_signature_defs{nullptr}; - - uint32_t _subgraph_index = 0; - std::string _subgraph_name; - std::vector _op_codes; - std::vector _inputs; - std::vector _outputs; - circle::DataFormat _data_format = circle::DataFormat::DataFormat_CHANNELS_FIRST; -}; - -} // namespace circleread - -#endif // __CIRCLEREAD_READ_H__ diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt index 0fb99dd..4ab0ea2 100644 --- a/compiler/cli/CMakeLists.txt +++ b/compiler/cli/CMakeLists.txt @@ -10,5 +10,5 @@ endif(NOT ENABLE_TEST) nnas_find_package(GTest QUIET) -GTest_AddTEst(cli_test ${TESTS}) +GTest_AddTest(cli_test ${TESTS}) target_link_libraries(cli_test cli) diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp index 420cf6f..0db7894 100644 --- a/compiler/coco/core/src/IR/Module.cpp +++ b/compiler/coco/core/src/IR/Module.cpp @@ -144,7 +144,7 @@ std::unique_ptr Module::create(void) m->_input = make_unique(); m->_output = make_unique(); - return std::move(m); + return m; } } // namespace coco diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp index 5ab7069..361dcc2 100644 --- a/compiler/coco/generic/src/IR/Data.cpp +++ b/compiler/coco/generic/src/IR/Data.cpp @@ -209,8 +209,7 @@ std::unique_ptr Data::create(void) data->_blob = std::move(blob); data->_fp32 = std::move(fp32); - // GCC 4.9 tries to copy data (while GCC 6.X doesn't) - return std::move(data); + return data; } } // namespace coco diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt index 404149c..34a3a4d 100644 --- a/compiler/common-artifacts/CMakeLists.txt +++ b/compiler/common-artifacts/CMakeLists.txt @@ -12,14 +12,6 @@ if(${PYTHON_VERSION_MINOR} LESS 8) return() endif() -# Create python virtual environment with tensorflow 2.6.0 -set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0") - -add_custom_command( - OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0} - COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0} -) - # Create python virtual environment with tensorflow 2.8.0 set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0") @@ -30,33 +22,36 @@ add_custom_command( # Create requirements.txt and install required pip packages set(REQUIREMENTS_FILE "requirements.txt") -set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}") set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}") -add_custom_command( - OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade - DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} -) +set(PYTHON_OVERLAY python3) +if(PYTHON_EXECUTABLE MATCHES python3.8) + set(PYTHON_OVERLAY python3.8) +endif() +# NOTE when using behind proxy with self signed certificate, need to set '--trusted-host' options +set(PIP_OPTION_TRUSTED_HOST ) +if(DEFINED ENV{ONE_PIP_OPTION_TRUST_HOST}) + set(PIP_OPTION_TRUSTED_HOST --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org) +endif() + +# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051 +# TODO remove protobuf==3.20.1 when issue is resolved add_custom_command( OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools - COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade + COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000 + ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000 + ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0} ) add_custom_target(common_artifacts_python_deps ALL - DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} - ${VIRTUALENV_OVERLAY_TF_2_8_0} - ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0} ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} ) @@ -246,7 +241,13 @@ foreach(RECIPE IN ITEMS ${RECIPES}) if(NOT DEFINED NO_OPTIMIZE_${RECIPE}) # Generate optimized .circle add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH} - COMMAND $ --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH} + # NOTE --resolve_customop_add is just to added for old -O1, no particular meaning + # --fold_dequantize is added to fold Tensor(FLOAT16) + DEQUANTIZE (Net_Dequantize_Add) + # model. FLOAT16 in general is NOT supported but only Tensor(FLOAT16) + DEQUANTIZE + # sequence accepted as folded to Tensor(FLOAT32). + # TODO revise giving options from the list file + COMMAND $ --resolve_customop_add --fold_dequantize --fold_densify + ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH} DEPENDS $ ${CIRCLE_OUTPUT_PATH} COMMENT "Generate ${OPT_CIRCLE_FILE}" ) diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index 92b07fd..2275a42 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -32,6 +32,7 @@ tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator tcgenerate(Ceil_000) tcgenerate(Conv2D_003) # runtime doesn't support dilation tcgenerate(Cos_000) +tcgenerate(Densify_000) # luci-interpreter doesn't support tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet @@ -67,6 +68,8 @@ tcgenerate(Neg_000) tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet tcgenerate(Net_Dangle_001) +tcgenerate(Net_Densify_Add_000) # luci-interpreter doesn't support Densify yet +tcgenerate(Net_Densify_Dequantize_Add_000) # luci-interpreter doesn't support Densify/Dequantize yet tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim tcgenerate(OneHot_000) diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp index 33cecbb..7481050 100644 --- a/compiler/common-artifacts/src/TestDataGenerator.cpp +++ b/compiler/common-artifacts/src/TestDataGenerator.cpp @@ -142,23 +142,15 @@ void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test"); - arser.add_argument("--input_data") - .required(true) - .nargs(1) - .type(arser::DataType::STR) - .help("Path to generate input data h5 file"); + arser.add_argument("circle").help("Circle file you want to test"); + arser.add_argument("--input_data").required(true).help("Path to generate input data h5 file"); arser.add_argument("--expected_data") .required(true) - .nargs(1) - .type(arser::DataType::STR) .help("Path to generate expected data h5 file"); arser.add_argument("--fixed_seed") - .required(false) .nargs(0) .help("Put a fixed seed into the random number generator"); arser.add_argument("--input_range") - .required(false) .nargs(3) .type(arser::DataType::STR_VEC) .help("Set random number range [min max] for the input as 'name min max'"); diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt index 1824d86..45cda75 100644 --- a/compiler/crew/CMakeLists.txt +++ b/compiler/crew/CMakeLists.txt @@ -12,9 +12,12 @@ if(NOT ENABLE_TEST) return() endif(NOT ENABLE_TEST) +configure_file("src/test_read_semicolon.ini" "test_read_semicolon.ini" COPYONLY) + nnas_find_package(GTest REQUIRED) GTest_AddTest(crew_test ${TESTS}) target_include_directories(crew_test PRIVATE src) target_link_libraries(crew_test nncc_common) target_link_libraries(crew_test crew) +target_link_libraries(crew_test foder) diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp index f0e3e8e..5177843 100644 --- a/compiler/crew/src/PConfigIni.cpp +++ b/compiler/crew/src/PConfigIni.cpp @@ -26,10 +26,36 @@ #include #include #include +#include namespace crew { +namespace +{ + +std::string filter_escape(const std::string &source) +{ + std::string key = source; + + // if key is surrounded with quotation + // TODO for quotation + + // if key has '\\' + ';', remove '\\' + auto pos = key.find("\\;"); + while (pos != std::string::npos) + { + auto k1 = key.substr(0, pos); + auto k2 = key.substr(pos + 1); + key = k1 + k2; + pos = key.find("\\;"); + } + + return key; +} + +} // namespace + Sections read_ini(const char *data, size_t length) { assert(data != nullptr); @@ -84,6 +110,7 @@ Sections read_ini(const char *data, size_t length) { auto key = string_line.substr(0, pos); auto val = string_line.substr(pos + 1); + key = filter_escape(key); section.items.emplace(key, val); } } @@ -107,11 +134,53 @@ Sections read_ini(const std::string &path) return read_ini(ini_data.data(), ini_data.size()); } +namespace +{ + +void replace(std::string &source, const std::string &token, const std::string &replace) +{ + size_t pos = 0; + while ((pos = source.find(token, pos)) != std::string::npos) + { + source.replace(pos, token.length(), replace); + pos += replace.length(); // Handles the case where 'replace' is a substring of 'token' + } +} + +Sections insert_escape(const Sections &inputs) +{ + Sections sections; + + // for all section in sections; + // if key has ';' then replace with '\;' + for (auto &input : inputs) + { + Section section; + section.name = input.name; + + for (auto &item : input.items) + { + auto key = item.first; + auto value = item.second; + + replace(key, ";", "\\;"); + section.items[key] = value; + } + sections.push_back(section); + } + + return sections; +} + +} // namespace + void write_ini(std::ostream &os, const Sections §ions) { std::stringstream ss; - ss << sections; + auto processed = insert_escape(sections); + + ss << processed; std::string strss = ss.str(); diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp index bdd2ccc..c062c69 100644 --- a/compiler/crew/src/PConfigIni.test.cpp +++ b/compiler/crew/src/PConfigIni.test.cpp @@ -17,12 +17,14 @@ #include "crew/PConfigIni.h" #include "crew/PConfigIniDump.h" +#include + #include #include #include -TEST(ConfigIniTest, read_ini_non_exist_file) +TEST(ConfigIniTest, read_ini_non_exist_file_NEG) { EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error); } @@ -85,3 +87,60 @@ TEST(ConfigIniTest, write_ini_file_error_NEG) crew::Sections sections; EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error); } + +TEST(ConfigIniTest, read_file_escape_semicolon) +{ + auto sections = crew::read_ini("test_read_semicolon.ini"); + ASSERT_EQ(1UL, sections.size()); + + auto its = sections.begin(); + ASSERT_NE(sections.end(), its); + EXPECT_TRUE("hello" == its->name); + ASSERT_EQ(1UL, its->items.size()); + + auto it = its->items.begin(); + ASSERT_NE(its->items.end(), it); + + EXPECT_TRUE("keya;keyb;keyc;keyd" == it->first); + EXPECT_TRUE("world" == it->second); +} + +TEST(ConfigIniTest, write_file_escape_semicolon) +{ + std::string path("test_write_semicolon.ini"); + + // save key with ';' + { + crew::Sections sections; + crew::Section hello; + hello.name = "hello"; + hello.items["keya;keyb;keyc;keyd"] = "world"; + sections.push_back(hello); + crew::write_ini(path, sections); + } + + // load the file and check if there is '\\' + std::string strbuffer; + { + foder::FileLoader file_loader{path}; + auto ini_data = file_loader.load(); + + auto buffer = std::vector(); + auto length = ini_data.size(); + buffer.reserve(length + 1); + + char *pbuffer = buffer.data(); + memcpy(pbuffer, ini_data.data(), length); + *(pbuffer + length) = 0; + + strbuffer = pbuffer; + } + int32_t count = 0; + size_t pos = 0; + while ((pos = strbuffer.find("\\;", pos)) != std::string::npos) + { + count++; + pos++; + } + EXPECT_TRUE(count == 3); +} diff --git a/compiler/crew/src/test_read_semicolon.ini b/compiler/crew/src/test_read_semicolon.ini new file mode 100644 index 0000000..d966fb7 --- /dev/null +++ b/compiler/crew/src/test_read_semicolon.ini @@ -0,0 +1,2 @@ +[hello] +keya\;keyb\;keyc\;keyd=world diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp index 7f94562..63baf0b 100644 --- a/compiler/enco/core/src/CppGen/Host.cpp +++ b/compiler/enco/core/src/CppGen/Host.cpp @@ -299,7 +299,7 @@ std::unique_ptr HostBlockCompiler::compile(const coco::Block res->append(ins->accept(prn)); } - return std::move(res); + return res; } } // namespace enco diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp index 599b079..3fc14ed 100644 --- a/compiler/enco/core/src/CppGen/Subnet.cpp +++ b/compiler/enco/core/src/CppGen/Subnet.cpp @@ -373,7 +373,7 @@ std::unique_ptr SubnetStructBuilder::build(const ANNBinder *binder // Finalize compilation res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");"); - return std::move(res); + return res; } std::unique_ptr SubnetBlockCompiler::compile(const ANNBinder *binder) const @@ -415,7 +415,7 @@ std::unique_ptr SubnetBlockCompiler::compile(const ANNBinder res->append("ANeuralNetworksExecution_free(execution);"); - return std::move(res); + return res; } } // namespace enco diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp index 714c27a..4bb21b0 100644 --- a/compiler/enco/core/src/Transforms/Split.cpp +++ b/compiler/enco/core/src/Transforms/Split.cpp @@ -656,7 +656,7 @@ public: app->ofm(ofm); app->ker(ker); - return std::move(app); + return app; } else { @@ -676,7 +676,7 @@ public: app->ofm(ofm); app->ker(ker); - return std::move(app); + return app; } } } @@ -704,7 +704,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asMul()) @@ -731,7 +731,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asPadF()) @@ -754,7 +754,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto maxpool = eval->op()->asMaxPool2D()) @@ -779,7 +779,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto avgpool = eval->op()->asAvgPool2D()) @@ -808,7 +808,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } } @@ -831,7 +831,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto relu6 = eval->op()->asReLU6()) @@ -853,7 +853,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto op = eval->op()->asConcatF()) @@ -880,7 +880,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asSub()) @@ -907,7 +907,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asDiv()) @@ -934,7 +934,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } @@ -967,7 +967,7 @@ std::unique_ptr make_appender(coco::Instr *ins) app->left(depth_concat->fst()->asFeature()); app->right(depth_concat->snd()->asFeature()); - return std::move(app); + return app; } // Build ANN IR from ANNConv2D instruction @@ -986,7 +986,7 @@ std::unique_ptr make_appender(coco::Instr *ins) app->ker(conv2d->ker()->asKernel()); app->bias(coco::safe_cast(conv2d->bias())); - return std::move(app); + return app; } return nullptr; diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp index aa2cad7..32ad443 100644 --- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp +++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace exo { diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h index 3b6c5f6..1e26581 100644 --- a/compiler/kuma/src/IntervalSet.h +++ b/compiler/kuma/src/IntervalSet.h @@ -17,6 +17,7 @@ #ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__ #define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__ +#include #include namespace kuma diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h index 1f78c9f..6be46c3 100644 --- a/compiler/loco/include/loco/IR/DataTypeTraits.h +++ b/compiler/loco/include/loco/IR/DataTypeTraits.h @@ -83,6 +83,13 @@ template <> struct DataTypeImpl using Type = uint64_t; }; +template <> struct DataTypeImpl +{ + // float16 type with 16bit value, encoded with help of FP16 library + // https://github.com/Maratyszcza/FP16/ + using Type = uint16_t; +}; + template <> struct DataTypeImpl { // Use C++ float type for IEEE 32-bit floating-point numbers @@ -132,6 +139,8 @@ inline uint32_t size(DataType data_type) return sizeof(DataTypeImpl::Type); case DataType::U64: return sizeof(DataTypeImpl::Type); + case DataType::FLOAT16: + return sizeof(DataTypeImpl::Type); case DataType::FLOAT32: return sizeof(DataTypeImpl::Type); case DataType::FLOAT64: diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp index 500f086..40ddb13 100644 --- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp +++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp @@ -122,9 +122,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) { using namespace loco; - auto encoder = encode_node->encoder(); - assert(encoder != nullptr); - auto decode_node = dynamic_cast(encode_node->input()); if (decode_node == nullptr) { @@ -132,6 +129,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) } assert(decode_node->input() != nullptr); + auto encoder = encode_node->encoder(); + assert(encoder != nullptr); + auto decoder = decode_node->decoder(); assert(decoder != nullptr); @@ -302,9 +302,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) { using namespace loco; - auto encoder = encode_node->encoder(); - assert(encoder != nullptr); - auto decode_node = dynamic_cast(encode_node->input()); if (decode_node == nullptr) { @@ -312,6 +309,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) } assert(decode_node->input() != nullptr); + auto encoder = encode_node->encoder(); + assert(encoder != nullptr); + auto decoder = decode_node->decoder(); assert(decoder != nullptr); diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp index 4762cff..0ed3543 100644 --- a/compiler/luci-eval-driver/src/EvalDriver.cpp +++ b/compiler/luci-eval-driver/src/EvalDriver.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include #include @@ -47,18 +47,6 @@ void writeDataToFile(const std::string &filename, const char *data, size_t data_ } } -std::unique_ptr importModel(const std::string &filename) -{ - std::ifstream fs(filename, std::ifstream::binary); - if (fs.fail()) - { - throw std::runtime_error("Cannot open model file \"" + filename + "\".\n"); - } - std::vector model_data((std::istreambuf_iterator(fs)), - std::istreambuf_iterator()); - return luci::Importer().importModule(circle::GetModel(model_data.data())); -} - template size_t getTensorSize(const NodeT *node) { uint32_t tensor_size = loco::size(node->dtype()); @@ -91,7 +79,8 @@ int entry(int argc, char **argv) const char *output_file = argv[4]; // Load model from the file - std::unique_ptr module = importModel(filename); + luci::ImporterEx importer; + std::unique_ptr module = importer.importVerifyModule(filename); if (module == nullptr) { std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl; diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst index d134a6b..f0df58d 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -12,6 +12,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) REGISTER_KERNEL(SpaceToDepth) diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h index 15ff032..efa6b16 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h @@ -18,7 +18,7 @@ #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h index 6046789..effb85d 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h @@ -17,7 +17,7 @@ #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H #define LUCI_INTERPRETER_PAL_QUANTIZE_H -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h new file mode 100644 index 0000000..813b1ec --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h @@ -0,0 +1,1568 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/eigen3/Eigen/Core" +#include "fixedpoint/fixedpoint.h" +#include "ruy/profiler/instrumentation.h" // from @ruy +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/add.h" +#include "tensorflow/lite/kernels/internal/reference/add_n.h" +#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h" +#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h" +#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h" +#include "tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "tensorflow/lite/kernels/internal/reference/cast.h" +#include "tensorflow/lite/kernels/internal/reference/ceil.h" +#include "tensorflow/lite/kernels/internal/reference/comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/concatenation.h" +#include "tensorflow/lite/kernels/internal/reference/conv.h" +#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h" +#include "tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/div.h" +#include "tensorflow/lite/kernels/internal/reference/elu.h" +#include "tensorflow/lite/kernels/internal/reference/exp.h" +#include "tensorflow/lite/kernels/internal/reference/fill.h" +#include "tensorflow/lite/kernels/internal/reference/floor.h" +#include "tensorflow/lite/kernels/internal/reference/floor_div.h" +#include "tensorflow/lite/kernels/internal/reference/floor_mod.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/gather.h" +#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" +#include "tensorflow/lite/kernels/internal/reference/l2normalization.h" +#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" +#include "tensorflow/lite/kernels/internal/reference/log_softmax.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/neg.h" +#include "tensorflow/lite/kernels/internal/reference/pad.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/prelu.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/reference/quantize.h" +#include "tensorflow/lite/kernels/internal/reference/reduce.h" +#include "tensorflow/lite/kernels/internal/reference/requantize.h" +#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h" +#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" +#include "tensorflow/lite/kernels/internal/reference/round.h" +#include "tensorflow/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h" +#include "tensorflow/lite/kernels/internal/reference/strided_slice.h" +#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/sub.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/reference/transpose.h" +#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h" +#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/types.h" +namespace tflite +{ + +namespace reference_ops +{ + +template +inline void Relu(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T lower = 0; + const T clamped = val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template +inline void Relu1(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Relu1 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T upper = 1; + const T lower = -1; + const T clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +inline void Relu6(const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("Relu6 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const float val = input_data[i]; + const float upper = 6; + const float lower = 0; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template +inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const int32 val = static_cast(input_data[i]); + int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset, + params.output_multiplier, + params.output_shift); + clamped = std::max(params.quantized_activation_min, clamped); + clamped = std::min(params.quantized_activation_max, clamped); + output_data[i] = static_cast(clamped); + } +} + +template +inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + const T max_value = params.quantized_activation_max; + const T min_value = params.quantized_activation_min; + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T clamped = val > max_value ? max_value : val < min_value ? min_value : val; + output_data[i] = clamped; + } +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params, + const RuntimeShape &unswitched_input1_shape, + const uint8 *unswitched_input1_data, + const RuntimeShape &unswitched_input2_shape, + const uint8 *unswitched_input2_data, + const RuntimeShape &output_shape, uint8 *output_data) +{ + ArithmeticParams switched_params = unswitched_params; + switched_params.input1_offset = unswitched_params.input2_offset; + switched_params.input2_offset = unswitched_params.input1_offset; + + const bool use_unswitched = unswitched_params.broadcast_category == + tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; + + const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params; + const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data; + const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data; + + // Fivefold nested loops. The second input resets its position for each + // iteration of the second loop. The first input resets its position at the + // beginning of the fourth loop. The innermost loop is an elementwise Mul of + // sections of the arrays. + uint8 *output_data_ptr = output_data; + const uint8 *input1_data_ptr = input1_data; + const uint8 *input2_data_reset = input2_data; + int y0 = params.broadcast_shape[0]; + int y1 = params.broadcast_shape[1]; + int y2 = params.broadcast_shape[2]; + int y3 = params.broadcast_shape[3]; + int y4 = params.broadcast_shape[4]; + for (int i0 = 0; i0 < y0; ++i0) + { + const uint8 *input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) + { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) + { + for (int i3 = 0; i3 < y3; ++i3) + { + MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr); + input2_data_ptr += y4; + output_data_ptr += y4; + } + input1_data_ptr += y4; + } + } + input2_data_reset = input2_data_ptr; + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16"); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16Uint8"); + int32 output_offset = params.output_offset; + int32 output_activation_min = params.quantized_activation_min; + int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = std::min(output_activation_max - output_offset, rescaled_result); + clamped_result = std::max(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + +inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16_t *input1_data, const RuntimeShape &input2_shape, + const int16_t *input2_data, const RuntimeShape &output_shape, + int16_t *output_data) +{ + ruy::profiler::ScopeLabel label("Sub/Int16"); + const int input1_shift = params.input1_shift; + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + const int16 output_activation_min = params.quantized_activation_min; + const int16 output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); + TFLITE_DCHECK_LE(input1_shift, 0); + TFLITE_DCHECK_LE(params.input2_shift, 0); + const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift; + + if (input1_shift == 0) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(input_ready_scaled, scaled_input); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } + else + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(scaled_input, input_ready_scaled); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } +} + +template +void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Pack"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + int inputs_count = params.inputs_count; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + for (int i = 0; i < inputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + const Scalar *input_ptr = input_data[i] + copy_size * k; + int loc = k * inputs_count * copy_size + i * copy_size; + memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar)); + } + } +} + +template +void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *const *output_datas) +{ + ruy::profiler::ScopeLabel label("Unpack"); + const int dimensions = input_shape.DimensionsCount(); + const int outputs_count = params.num_split; + + int outer_size = 1; + int axis = params.axis; + if (axis < 0) + { + axis += dimensions; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, dimensions); + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) + { + copy_size *= input_shape.Dims(i); + } + TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); + + for (int i = 0; i < outputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + Scalar *output_ptr = output_datas[i] + copy_size * k; + int loc = k * outputs_count * copy_size + i * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template +void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const uint8 *const *input_data, const RuntimeShape &output_shape, + uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("PackWithScaling"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + const int32 *input_zeropoint = params.input_zeropoint; + const float *input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32 output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + Scalar *output_ptr = output_data; + const float inverse_output_scale = 1.f / output_scale; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < inputs_count; ++i) + { + if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale) + { + memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar)); + } + else + { + assert(false); + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + auto input_ptr = input_data[i]; + for (int j = 0; j < copy_size; ++j) + { + const int value = + static_cast(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; + output_ptr[j] = static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + +template +void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, + Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("DepthConcatenation"); + auto params_copy = params; + params_copy.axis = 3; + Concatenation(params_copy, input_shapes, input_data, output_shape, output_data); +} + +inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const float *input_data, const RuntimeShape &unextended_prev_activ_shape, + const float *prev_activ_data, const RuntimeShape &weights_shape, + const float *weights_data, const RuntimeShape &unextended_bias_shape, + const float *bias_data, const RuntimeShape &unextended_prev_state_shape, + const float *prev_state_data, + const RuntimeShape &unextended_output_state_shape, float *output_state_data, + const RuntimeShape &unextended_output_activ_shape, float *output_activ_data, + const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data, + const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data) +{ + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector concat_input_arrays_data; + std::vector concat_input_arrays_shapes; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_shapes.push_back(&input_shape); + concat_input_arrays_shapes.push_back(&prev_activ_shape); + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = concat_input_arrays_data.size(); + Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]), + concat_temp_shape, concat_temp_data); + + // Fully connected + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits::lowest(); + fc_params.float_activation_max = std::numeric_limits::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data, + bias_shape, bias_data, activ_temp_shape, activ_temp_data); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) + { + for (int w = 0; w < width; ++w) + { + for (int h = 0; h < height; ++h) + { + for (int c = 0; c < output_depth; ++c) + { + const float input_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)])); + const float new_input = + std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]); + const float forget_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)])); + const float output_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)])); + const float new_state = + input_gate * new_input + + forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)]; + output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; + output_activ_data[Offset(output_activ_shape, b, h, w, c)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +// Quantized LSTM cell implementation. +// The quantization of the input, output arrays is as follows: +// - The input activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that is the natural interval for output +// activations (see next point) and these need to be concatenated together. +// We could accommodate different ranges by re-scaling, but we empirically +// found that setting the input activations range to be [-1, 127/128] in the +// first place, removing the need for re-scaling, greatly improves accuracy. +// - The output activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that the definition of a LSTM cell makes them +// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] +// makes for simpler, more accurate fixed-point arithmetic. +// - The output-at-previous-timestep state array is obviously quantized as +// the output activations. +// - The internal LSTM memory (not the output-at-previous-timestep, the other +// internal state array) is int16-quantized and may use any power-of-two, +// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call +// StateIntegerBits below, see the below discussion of that template +// parameter ("The StateIntegerBits template parameter"). +// - The output of the internal fully-connected node is int16-quantized +// on the interval [-8, 8 * 32767/32768], the rationale for which is +// explained just below ("Why [-8, 8] for fully-connected output?"). +// +// +// === The StateIntegerBits template parameter === +// +// The StateIntegerBits template parameter controls the fixed-point format used +// to represent the internal memory of the LSTM cell (not the +// output-at-previous-timestep, the other internal state array). It's currently +// a template parameter so that the model can control that. The most typical +// value for StateIntegerBits is 4. Other plausible values are anywhere between +// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, +// and drop that template parameter. The reason why it can't be a runtime +// parameter is that this controls the fixed-point format used, i.e. we need to +// generate actually different code based on it. In particular, we generate code +// for a fixed-point tanh() implementation for that format, which internally +// uses a fixed-point exp() implementation, which internally uses a +// barrel-shifter with a number of steps that depends on StateIntegerBits. +// Another consequence of that is that a higher value of StateIntegerBits +// results in a more expensive implementation (more barrel shifter steps +// needed). +// +// +// === Why [-8, 8] for fully-connected output? === +// +// This array is only fed to Logistic and Tanh functions, for which +// the quantized implementation will want to use fixed-point arithmetic, +// requiring a power-of-two representation interval. Thus, we should right +// away quantize this array to a power-of-two interval; otherwise, +// implementation will need to rescale that, losing any benefit that a tighter +// representation interval might otherwise yield, while introducing some +// numerical error and computational overhead. +// +// Now, Logistic and Tanh +// are nearly constant (nearly equal to their horizontal asymptotes) +// outside of a small bounded interval around 0: +// +// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 +// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 +// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 +// +// From this, we see that clamping to [-4, 4] would be too inaccurate +// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) +// while clamping to [-16, 16] would make no difference even in float32. +// However, for a fixed-point implementation in 16-bit integers, using 5 +// integer bits to represent the [-16, 16] range would leave only 11 +// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive +// representable values. Notice that is higher than the +// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. +// Using [-8, 8] thus seems like the better compromise overall, enjoying +// an increment of 2.4e-4 between representable values and a worst-case +// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with +// [-16, 16]. +// +// Moreover, all other things being equal, it is nice to choose the narrower +// representation range, as that makes the implementation of fixed-point +// math functions a little cheaper (each integer bit requires an additional +// barrel-shifter atep in the implementation of exp(-x)). That is further +// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make +// sense for 32-bit float or 32-bit fixed-point quantization, but we are +// aiming for 16-bit fixed-point quantization of these internal nodes here. +// +template +inline void +LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, + const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape, + const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape, + const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape, + const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, + int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, + uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, + uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, + int16 *activ_temp_data_int16, void *gemmlowp_context) +{ + (void)gemmlowp_context; // only used in optimized code. + int32 weights_zero_point = params.weights_zero_point; + int32 accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + // Gather dimensions information, and perform consistency checks. + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape, + output_state_shape, output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); + const int fc_output_depth = + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); + + // Depth-concatenate prev_activ and input data together. + uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8}; + const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data, + concat_temp_shape, concat_temp_data_uint8); + + // Implementation of the fully connected node inside the LSTM cell. + // The operands are 8-bit integers, the accumulators are internally 32bit + // integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. + for (int b = 0; b < fc_batches; ++b) + { + for (int out_c = 0; out_c < fc_output_depth; ++out_c) + { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data_int32[out_c]; + // Accumulation loop. + for (int d = 0; d < fc_accum_depth; ++d) + { + int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128; + int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; + accum += input_val * weights_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, using 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); + // Saturate, cast to int16, and store to the temporary activations array. + accum = std::max(-32768, std::min(32767, static_cast(accum))); + activ_temp_data_int16[out_c + fc_output_depth * b] = accum; + } + } + + // Rest of the LSTM cell: tanh and logistic math functions, and some adds + // and muls, all done in 16-bit fixed-point. + for (int b = 0; b < outer_size; ++b) + { + for (int c = 0; c < output_depth; ++c) + { + // Define the fixed-point data types that we will use here. All use + // int16 as the underlying integer type i.e. all are 16-bit fixed-point. + // They only differ by the number of integral vs. fractional bits, + // determining the range of values that they can represent. + // + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8]. + // This is the range of the previous fully-connected node's output, + // which is our input here. + using F3 = gemmlowp::FixedPoint; + // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, + // 2^StateIntegerBits]. It's used to represent the internal state, whose + // number of integer bits is currently dictated by the model. See comment + // on the StateIntegerBits template parameter above. + using FS = gemmlowp::FixedPoint; + // Implementation of input gate, using fixed-point logistic function. + F3 input_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); + F0 input_gate_output = gemmlowp::logistic(input_gate_input); + // Implementation of input modulation gate, using fixed-point tanh + // function. + F3 input_modulation_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); + F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input); + // Implementation of forget gate, using fixed-point logistic function. + F3 forget_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); + F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); + // Implementation of output gate, using fixed-point logistic function. + F3 output_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); + F0 output_gate_output = gemmlowp::logistic(output_gate_input); + // Implementation of internal multiplication nodes, still in fixed-point. + F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output; + FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); + FS prev_state_times_forget_state = forget_gate_output * prev_state; + // Implementation of internal addition node, saturating. + FS new_state = + gemmlowp::SaturatingAdd(gemmlowp::Rescale(input_times_input_modulation), + prev_state_times_forget_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); + // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. + output_state_data_int16[b * output_depth + c] = new_state.raw(); + // Down-scale the output activations to 8-bit integers, saturating, + // and store back to memory. + int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); + int16 clamped_output_activ = + std::max(-128, std::min(127, rescaled_output_activ)); + output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ; + } + } +} + +template +void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape *const *output_shapes, Scalar *const *output_data) +{ + ruy::profiler::ScopeLabel label("Split"); + const int split_dimensions = input_shape.DimensionsCount(); + int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis; + int outputs_count = params.num_split; + TFLITE_DCHECK_LT(axis, split_dimensions); + + int64_t split_size = 0; + for (int i = 0; i < outputs_count; i++) + { + TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions); + for (int j = 0; j < split_dimensions; j++) + { + if (j != axis) + { + MatchingDim(*output_shapes[i], j, input_shape, j); + } + } + split_size += output_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + // For all output arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < split_dimensions; ++i) + { + base_inner_size *= input_shape.Dims(i); + } + + const Scalar *input_ptr = input_data; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < outputs_count; ++i) + { + const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size; + memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar)); + input_ptr += copy_size; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) +{ + return (b * height + h) * width + w; +} + +inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) + { + for (int c = 0; c < depth; ++c) + { + const int begin_input_c = std::max(0, static_cast(c - op_params.range)); + const int end_input_c = std::min(depth, static_cast(c + op_params.range)); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) + { + const float input_val = input_data[i * depth + input_c]; + accum += input_val * input_val; + } + const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta); + output_data[i * depth + c] = input_data[i * depth + c] * multiplier; + } + } +} + +inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; i++) + { + output_data[i] = static_cast(input_data[i]); + } +} + +inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, + const float *input_data, const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("FakeQuant"); + float rmin = op_params.minmax.min; + float rmax = op_params.minmax.max; + int num_bits = op_params.num_bits; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size); +} + +// Common subroutine for both `GatherNd` and `GatherNdString`. +struct GatherNdHelperResult +{ + int n_slices; + int slice_size; + int indices_nd; + std::vector dims_to_count; +}; + +// Returns common values being used on both `GatherNd` and `GatherNdString`. +inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape, + const RuntimeShape &indices_shape) +{ + GatherNdHelperResult ret; + ret.n_slices = 1; + ret.slice_size = 1; + const int indices_dims = indices_shape.DimensionsCount(); + ret.indices_nd = indices_shape.Dims(indices_dims - 1); + const int params_dims = params_shape.DimensionsCount(); + for (int i = 0; i < indices_dims - 1; ++i) + { + ret.n_slices *= indices_shape.Dims(i); + } + for (int i = ret.indices_nd; i < params_dims; ++i) + { + ret.slice_size *= params_shape.Dims(i); + } + + int remain_flat_size = params_shape.FlatSize(); + ret.dims_to_count = std::vector(ret.indices_nd, 0); + for (int i = 0; i < ret.indices_nd; ++i) + { + ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i); + remain_flat_size = ret.dims_to_count[i]; + } + + return ret; +} + +template +inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, ParamsT *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNd"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + std::memcpy(output_data + i * res.slice_size, params_data + from_pos, + sizeof(ParamsT) * res.slice_size); + } +} + +#ifndef TF_LITE_STATIC_MEMORY +template +inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, TfLiteTensor *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNdString"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + DynamicBuffer buffer; + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + for (int j = 0; j < res.slice_size; ++j) + { + buffer.AddString(GetString(params_data, from_pos + j)); + } + } + buffer.WriteToTensor(output_data, /*new_shape=*/nullptr); +} +#endif + +template +inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &updates_shape, const UpdatesT *updates_data, + const RuntimeShape &output_shape, UpdatesT *output_data) +{ + ruy::profiler::ScopeLabel label("ScatterNd"); + + int n_slices = 1; + int slice_size = 1; + const int outer_dims = indices_shape.DimensionsCount() - 1; + const int indices_nd = indices_shape.Dims(outer_dims); + const int updates_dims = updates_shape.DimensionsCount(); + for (int i = 0; i < outer_dims; ++i) + { + n_slices *= indices_shape.Dims(i); + } + for (int i = outer_dims; i < updates_dims; ++i) + { + slice_size *= updates_shape.Dims(i); + } + + int output_flat_size = output_shape.FlatSize(); + int remain_flat_size = output_flat_size; + std::vector dims_to_count(indices_nd, 0); + for (int i = 0; i < indices_nd; ++i) + { + dims_to_count[i] = remain_flat_size / output_shape.Dims(i); + remain_flat_size = dims_to_count[i]; + } + + memset(output_data, 0, sizeof(UpdatesT) * output_flat_size); + for (int i = 0; i < n_slices; ++i) + { + int to_pos = 0; + for (int j = 0; j < indices_nd; ++j) + { + IndicesT idx = indices_data[i * indices_nd + j]; + TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j)); + to_pos += idx * dims_to_count[j]; + } + for (int j = 0; j < slice_size; j++) + { + output_data[to_pos + j] += updates_data[i * slice_size + j]; + } + } +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const RuntimeShape &output_shape, SequentialTensorWriter *writer) +{ + const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); + TFLITE_DCHECK_LE(op_params.begin_count, 5); + TFLITE_DCHECK_LE(op_params.size_count, 5); + const int begin_count = op_params.begin_count; + const int size_count = op_params.size_count; + // We front-pad the begin and size vectors. + std::array start; + std::array stop; + for (int i = 0; i < 5; ++i) + { + int padded_i = 5 - i; + start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; + stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1) + ? ext_shape.Dims(i) + : start[i] + op_params.size[size_count - padded_i]; + } + + for (int i0 = start[0]; i0 < stop[0]; ++i0) + { + for (int i1 = start[1]; i1 < stop[1]; ++i1) + { + for (int i2 = start[2]; i2 < stop[2]; ++i2) + { + for (int i3 = start[3]; i3 < stop[3]; ++i3) + { + for (int i4 = start[4]; i4 < stop[4]; ++i4) + { + writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); + } + } + } + } + } +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + SequentialTensorWriter writer(input_data, output_data); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output) +{ + SequentialTensorWriter writer(input, output); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template +void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto min_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Minimum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto max_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Maximum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater()); +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, + const RuntimeShape &input2_shape, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + // Drop shape of second input: not needed. + ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + int64_t flatsize; + // Allow select operator executions on mixed scalar tensors and one element + // tensors. + if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 && + input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) + { + flatsize = 1; + } + else + { + flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); + } + for (int64_t i = 0; i < flatsize; ++i) + { + output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +template +void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int64_t outer_size = input_condition_shape.FlatSize(); + int64_t inner_size; + if (input_condition_shape.DimensionsCount() == 0) + { + inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape); + } + else + { + TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size); + inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); + } + + int64_t offset = 0; + for (int64_t i = 0; i < outer_size; i++) + { + const T *input_data = input_condition_data[i] ? input_x_data : input_y_data; + memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); + offset += inner_size; + } +} + +template +void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); + + const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape); + + NdArrayDesc<4> desc_condition; + NdArrayDesc<4> desc_x; + NdArrayDesc<4> desc_y; + NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape, + &desc_condition, &desc_x, &desc_y); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) + { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) + { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) + { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) + { + const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c); + const int x_index = SubscriptToIndex(desc_x, b, y, x, c); + const int y_index = SubscriptToIndex(desc_y, b, y, x, c); + output_data[Offset(extended_output_shape, b, y, x, c)] = + input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; + } + } + } + } +} + +template +void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data, + T *output_data) +{ + const size_t size = input_condition_shape.FlatSize(); + if (size == 0) + { + // Dimension is zero, in which case we don't need to output. + return; + } + const size_t cond_rank = input_condition_shape.DimensionsCount(); + + std::vector dims_to_count(cond_rank, 0); + int cur_flat_size = size; + for (int i = 0; i < cond_rank; ++i) + { + dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i); + cur_flat_size = dims_to_count[i]; + } + + int output_index = 0; + for (int i = 0; i < size; ++i) + { + if (input_condition_data[i]) + { + // Insert the coordinate of the current item (row major) into output. + int flat_index = i; + for (int j = 0; j < cond_rank; ++j) + { + int coord_j = flat_index / dims_to_count[j]; + output_data[output_index * cond_rank + j] = coord_j; + flat_index %= dims_to_count[j]; + } + output_index++; + } + } +} + +// For easy implementation, the indices is always a vector of size-4 vectors. +template +inline void SparseToDense(const std::vector> &indices, const T *values, + T default_value, bool value_is_scalar, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int value_count = indices.size(); + + // First fill the output_data with default value. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; ++i) + { + output_data[i] = default_value; + } + + // Special handle for value is scalar case to avoid checking the boolean + // condition within the loop every time. + if (value_is_scalar) + { + for (int i = 0; i < value_count; ++i) + { + const std::vector &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = *values; // just use the first value. + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } + return; + } + + // Go through the values and indices to fill the sparse values. + for (int i = 0; i < value_count; ++i) + { + const std::vector &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = values[i]; + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } +} + +template +inline void Pow(const RuntimeShape &input1_shape, const T *input1_data, + const RuntimeShape &input2_shape, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = std::pow(input1_data[i], input2_data[i]); + } +} + +template +inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data, + const RuntimeShape &unextended_input2_shape, const T *input2_data, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1, + &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) + { + for (int y = 0; y < output_shape.Dims(1); ++y) + { + for (int x = 0; x < output_shape.Dims(2); ++x) + { + for (int c = 0; c < output_shape.Dims(3); ++c) + { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = std::pow(in1_val, in2_val); + } + } + } + } +} + +template +void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Reverse"); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_axis = input_shape.Dims(axis); + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_axis; ++j) + { + const int start_pos = (i * dims_at_axis + j) * copy_size; + Scalar *output_ptr = output_data + start_pos; + int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template +void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim, + const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("ReverseSequence"); + + int outer_size = 1; + int outer_dim = std::min(batch_dim, seq_dim); + int medium_dim = std::max(batch_dim, seq_dim); + for (int i = 0; i < outer_dim; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int medium_size = 1; + for (int i = outer_dim + 1; i < medium_dim; ++i) + { + medium_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_outer_dim = input_shape.Dims(outer_dim); + const int dims_at_medium_dim = input_shape.Dims(medium_dim); + + Scalar *output_ptr; + if (batch_dim > seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + int sl = seq_lengths[q] - 1; + if (j > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size; + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } + else if (batch_dim < seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + int sl = seq_lengths[j] - 1; + const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + if (q > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } +} + +template +inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape); + + memset(output_data, 0, sizeof(T) * output_shape.FlatSize()); + + for (int i = 0; i < input_shape.Dims(0); i++) + { + int output_index = segment_ids_data[i]; + for (int j = 0; j < segment_flat_size; ++j) + { + output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j]; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst index 428b15e..1e6c41e 100644 --- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -13,6 +13,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -48,6 +49,7 @@ REGISTER_KERNEL(PadV2) REGISTER_KERNEL(Pow) REGISTER_KERNEL(PRelu) REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(ReduceMax) REGISTER_KERNEL(Relu) REGISTER_KERNEL(Relu6) REGISTER_KERNEL(Reshape) @@ -55,6 +57,7 @@ REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(ReverseV2) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Slice) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) diff --git a/compiler/luci-interpreter/pal/linux/PALreference_ops.h b/compiler/luci-interpreter/pal/linux/PALreference_ops.h new file mode 100644 index 0000000..825ebfe --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALreference_ops.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst index d134a6b..f0df58d 100644 --- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -12,6 +12,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) REGISTER_KERNEL(SpaceToDepth) diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h index 15ff032..efa6b16 100644 --- a/compiler/luci-interpreter/pal/mcu/PALDequantize.h +++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h @@ -18,7 +18,7 @@ #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h index 6046789..effb85d 100644 --- a/compiler/luci-interpreter/pal/mcu/PALQuantize.h +++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h @@ -17,7 +17,7 @@ #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H #define LUCI_INTERPRETER_PAL_QUANTIZE_H -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/mcu/PALreference_ops.h b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h new file mode 100644 index 0000000..62c7209 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/eigen3/Eigen/Core" +#include "fixedpoint/fixedpoint.h" +#include "ruy/profiler/instrumentation.h" // from @ruy +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/add.h" +#include "tensorflow/lite/kernels/internal/reference/add_n.h" +#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h" +#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h" +#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h" +#include "tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "tensorflow/lite/kernels/internal/reference/cast.h" +#include "tensorflow/lite/kernels/internal/reference/ceil.h" +#include "tensorflow/lite/kernels/internal/reference/comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/concatenation.h" +#include "tensorflow/lite/kernels/internal/reference/conv.h" +#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h" +#include "tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/div.h" +#include "tensorflow/lite/kernels/internal/reference/elu.h" +#include "tensorflow/lite/kernels/internal/reference/exp.h" +#include "tensorflow/lite/kernels/internal/reference/fill.h" +#include "tensorflow/lite/kernels/internal/reference/floor.h" +#include "tensorflow/lite/kernels/internal/reference/floor_div.h" +#include "tensorflow/lite/kernels/internal/reference/floor_mod.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/gather.h" +#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" +#include "tensorflow/lite/kernels/internal/reference/l2normalization.h" +#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" +#include "tensorflow/lite/kernels/internal/reference/log_softmax.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/neg.h" +#include "tensorflow/lite/kernels/internal/reference/pad.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/prelu.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/reference/quantize.h" +#include "tensorflow/lite/kernels/internal/reference/reduce.h" +#include "tensorflow/lite/kernels/internal/reference/requantize.h" +#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h" +#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" +#include "tensorflow/lite/kernels/internal/reference/round.h" +#include "tensorflow/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h" +#include "tensorflow/lite/kernels/internal/reference/strided_slice.h" +#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/sub.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/reference/transpose.h" +#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h" +#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/types.h" +namespace tflite +{ + +namespace reference_ops +{ + +template +inline void Relu(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T lower = 0; + const T clamped = val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template +inline void Relu1(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Relu1 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T upper = 1; + const T lower = -1; + const T clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +inline void Relu6(const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("Relu6 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const float val = input_data[i]; + const float upper = 6; + const float lower = 0; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template +inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const int32 val = static_cast(input_data[i]); + int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset, + params.output_multiplier, + params.output_shift); + clamped = std::max(params.quantized_activation_min, clamped); + clamped = std::min(params.quantized_activation_max, clamped); + output_data[i] = static_cast(clamped); + } +} + +template +inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + const T max_value = params.quantized_activation_max; + const T min_value = params.quantized_activation_min; + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T clamped = val > max_value ? max_value : val < min_value ? min_value : val; + output_data[i] = clamped; + } +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params, + const RuntimeShape &unswitched_input1_shape, + const uint8 *unswitched_input1_data, + const RuntimeShape &unswitched_input2_shape, + const uint8 *unswitched_input2_data, + const RuntimeShape &output_shape, uint8 *output_data) +{ + ArithmeticParams switched_params = unswitched_params; + switched_params.input1_offset = unswitched_params.input2_offset; + switched_params.input2_offset = unswitched_params.input1_offset; + + const bool use_unswitched = unswitched_params.broadcast_category == + tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; + + const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params; + const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data; + const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data; + + // Fivefold nested loops. The second input resets its position for each + // iteration of the second loop. The first input resets its position at the + // beginning of the fourth loop. The innermost loop is an elementwise Mul of + // sections of the arrays. + uint8 *output_data_ptr = output_data; + const uint8 *input1_data_ptr = input1_data; + const uint8 *input2_data_reset = input2_data; + int y0 = params.broadcast_shape[0]; + int y1 = params.broadcast_shape[1]; + int y2 = params.broadcast_shape[2]; + int y3 = params.broadcast_shape[3]; + int y4 = params.broadcast_shape[4]; + for (int i0 = 0; i0 < y0; ++i0) + { + const uint8 *input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) + { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) + { + for (int i3 = 0; i3 < y3; ++i3) + { + MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr); + input2_data_ptr += y4; + output_data_ptr += y4; + } + input1_data_ptr += y4; + } + } + input2_data_reset = input2_data_ptr; + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16"); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16Uint8"); + int32 output_offset = params.output_offset; + int32 output_activation_min = params.quantized_activation_min; + int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = std::min(output_activation_max - output_offset, rescaled_result); + clamped_result = std::max(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + +inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16_t *input1_data, const RuntimeShape &input2_shape, + const int16_t *input2_data, const RuntimeShape &output_shape, + int16_t *output_data) +{ + ruy::profiler::ScopeLabel label("Sub/Int16"); + const int input1_shift = params.input1_shift; + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + const int16 output_activation_min = params.quantized_activation_min; + const int16 output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); + TFLITE_DCHECK_LE(input1_shift, 0); + TFLITE_DCHECK_LE(params.input2_shift, 0); + const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift; + + if (input1_shift == 0) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(input_ready_scaled, scaled_input); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } + else + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(scaled_input, input_ready_scaled); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } +} + +template +void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Pack"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + int inputs_count = params.inputs_count; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + for (int i = 0; i < inputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + const Scalar *input_ptr = input_data[i] + copy_size * k; + int loc = k * inputs_count * copy_size + i * copy_size; + memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar)); + } + } +} + +template +void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *const *output_datas) +{ + ruy::profiler::ScopeLabel label("Unpack"); + const int dimensions = input_shape.DimensionsCount(); + const int outputs_count = params.num_split; + + int outer_size = 1; + int axis = params.axis; + if (axis < 0) + { + axis += dimensions; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, dimensions); + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) + { + copy_size *= input_shape.Dims(i); + } + TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); + + for (int i = 0; i < outputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + Scalar *output_ptr = output_datas[i] + copy_size * k; + int loc = k * outputs_count * copy_size + i * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template +void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const uint8 *const *input_data, const RuntimeShape &output_shape, + uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("PackWithScaling"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + const int32 *input_zeropoint = params.input_zeropoint; + const float *input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32 output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + Scalar *output_ptr = output_data; + const float inverse_output_scale = 1.f / output_scale; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < inputs_count; ++i) + { + if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale) + { + memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar)); + } + else + { + assert(false); + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + auto input_ptr = input_data[i]; + for (int j = 0; j < copy_size; ++j) + { + const int value = + static_cast(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; + output_ptr[j] = static_cast(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + +template +void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, + Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("DepthConcatenation"); + auto params_copy = params; + params_copy.axis = 3; + Concatenation(params_copy, input_shapes, input_data, output_shape, output_data); +} + +inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const float *input_data, const RuntimeShape &unextended_prev_activ_shape, + const float *prev_activ_data, const RuntimeShape &weights_shape, + const float *weights_data, const RuntimeShape &unextended_bias_shape, + const float *bias_data, const RuntimeShape &unextended_prev_state_shape, + const float *prev_state_data, + const RuntimeShape &unextended_output_state_shape, float *output_state_data, + const RuntimeShape &unextended_output_activ_shape, float *output_activ_data, + const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data, + const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data) +{ + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector concat_input_arrays_data; + std::vector concat_input_arrays_shapes; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_shapes.push_back(&input_shape); + concat_input_arrays_shapes.push_back(&prev_activ_shape); + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = concat_input_arrays_data.size(); + Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]), + concat_temp_shape, concat_temp_data); + + // Fully connected + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits::lowest(); + fc_params.float_activation_max = std::numeric_limits::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data, + bias_shape, bias_data, activ_temp_shape, activ_temp_data); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) + { + for (int w = 0; w < width; ++w) + { + for (int h = 0; h < height; ++h) + { + for (int c = 0; c < output_depth; ++c) + { + const float input_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)])); + const float new_input = + std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]); + const float forget_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)])); + const float output_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)])); + const float new_state = + input_gate * new_input + + forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)]; + output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; + output_activ_data[Offset(output_activ_shape, b, h, w, c)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +// Quantized LSTM cell implementation. +// The quantization of the input, output arrays is as follows: +// - The input activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that is the natural interval for output +// activations (see next point) and these need to be concatenated together. +// We could accommodate different ranges by re-scaling, but we empirically +// found that setting the input activations range to be [-1, 127/128] in the +// first place, removing the need for re-scaling, greatly improves accuracy. +// - The output activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that the definition of a LSTM cell makes them +// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] +// makes for simpler, more accurate fixed-point arithmetic. +// - The output-at-previous-timestep state array is obviously quantized as +// the output activations. +// - The internal LSTM memory (not the output-at-previous-timestep, the other +// internal state array) is int16-quantized and may use any power-of-two, +// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call +// StateIntegerBits below, see the below discussion of that template +// parameter ("The StateIntegerBits template parameter"). +// - The output of the internal fully-connected node is int16-quantized +// on the interval [-8, 8 * 32767/32768], the rationale for which is +// explained just below ("Why [-8, 8] for fully-connected output?"). +// +// +// === The StateIntegerBits template parameter === +// +// The StateIntegerBits template parameter controls the fixed-point format used +// to represent the internal memory of the LSTM cell (not the +// output-at-previous-timestep, the other internal state array). It's currently +// a template parameter so that the model can control that. The most typical +// value for StateIntegerBits is 4. Other plausible values are anywhere between +// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, +// and drop that template parameter. The reason why it can't be a runtime +// parameter is that this controls the fixed-point format used, i.e. we need to +// generate actually different code based on it. In particular, we generate code +// for a fixed-point tanh() implementation for that format, which internally +// uses a fixed-point exp() implementation, which internally uses a +// barrel-shifter with a number of steps that depends on StateIntegerBits. +// Another consequence of that is that a higher value of StateIntegerBits +// results in a more expensive implementation (more barrel shifter steps +// needed). +// +// +// === Why [-8, 8] for fully-connected output? === +// +// This array is only fed to Logistic and Tanh functions, for which +// the quantized implementation will want to use fixed-point arithmetic, +// requiring a power-of-two representation interval. Thus, we should right +// away quantize this array to a power-of-two interval; otherwise, +// implementation will need to rescale that, losing any benefit that a tighter +// representation interval might otherwise yield, while introducing some +// numerical error and computational overhead. +// +// Now, Logistic and Tanh +// are nearly constant (nearly equal to their horizontal asymptotes) +// outside of a small bounded interval around 0: +// +// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 +// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 +// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 +// +// From this, we see that clamping to [-4, 4] would be too inaccurate +// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) +// while clamping to [-16, 16] would make no difference even in float32. +// However, for a fixed-point implementation in 16-bit integers, using 5 +// integer bits to represent the [-16, 16] range would leave only 11 +// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive +// representable values. Notice that is higher than the +// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. +// Using [-8, 8] thus seems like the better compromise overall, enjoying +// an increment of 2.4e-4 between representable values and a worst-case +// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with +// [-16, 16]. +// +// Moreover, all other things being equal, it is nice to choose the narrower +// representation range, as that makes the implementation of fixed-point +// math functions a little cheaper (each integer bit requires an additional +// barrel-shifter atep in the implementation of exp(-x)). That is further +// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make +// sense for 32-bit float or 32-bit fixed-point quantization, but we are +// aiming for 16-bit fixed-point quantization of these internal nodes here. +// +template +inline void +LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, + const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape, + const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape, + const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape, + const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, + int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, + uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, + uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, + int16 *activ_temp_data_int16, void *gemmlowp_context) +{ + (void)gemmlowp_context; // only used in optimized code. + int32 weights_zero_point = params.weights_zero_point; + int32 accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + // Gather dimensions information, and perform consistency checks. + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape, + output_state_shape, output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); + const int fc_output_depth = + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); + + // Depth-concatenate prev_activ and input data together. + uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8}; + const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data, + concat_temp_shape, concat_temp_data_uint8); + + // Implementation of the fully connected node inside the LSTM cell. + // The operands are 8-bit integers, the accumulators are internally 32bit + // integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. + for (int b = 0; b < fc_batches; ++b) + { + for (int out_c = 0; out_c < fc_output_depth; ++out_c) + { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data_int32[out_c]; + // Accumulation loop. + for (int d = 0; d < fc_accum_depth; ++d) + { + int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128; + int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; + accum += input_val * weights_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, using 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); + // Saturate, cast to int16, and store to the temporary activations array. + accum = std::max(-32768, std::min(32767, static_cast(accum))); + activ_temp_data_int16[out_c + fc_output_depth * b] = accum; + } + } + + // Rest of the LSTM cell: tanh and logistic math functions, and some adds + // and muls, all done in 16-bit fixed-point. + for (int b = 0; b < outer_size; ++b) + { + for (int c = 0; c < output_depth; ++c) + { + // Define the fixed-point data types that we will use here. All use + // int16 as the underlying integer type i.e. all are 16-bit fixed-point. + // They only differ by the number of integral vs. fractional bits, + // determining the range of values that they can represent. + // + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8]. + // This is the range of the previous fully-connected node's output, + // which is our input here. + using F3 = gemmlowp::FixedPoint; + // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, + // 2^StateIntegerBits]. It's used to represent the internal state, whose + // number of integer bits is currently dictated by the model. See comment + // on the StateIntegerBits template parameter above. + using FS = gemmlowp::FixedPoint; + // Implementation of input gate, using fixed-point logistic function. + F3 input_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); + F0 input_gate_output = gemmlowp::logistic(input_gate_input); + // Implementation of input modulation gate, using fixed-point tanh + // function. + F3 input_modulation_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); + F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input); + // Implementation of forget gate, using fixed-point logistic function. + F3 forget_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); + F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); + // Implementation of output gate, using fixed-point logistic function. + F3 output_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); + F0 output_gate_output = gemmlowp::logistic(output_gate_input); + // Implementation of internal multiplication nodes, still in fixed-point. + F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output; + FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); + FS prev_state_times_forget_state = forget_gate_output * prev_state; + // Implementation of internal addition node, saturating. + FS new_state = + gemmlowp::SaturatingAdd(gemmlowp::Rescale(input_times_input_modulation), + prev_state_times_forget_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); + // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. + output_state_data_int16[b * output_depth + c] = new_state.raw(); + // Down-scale the output activations to 8-bit integers, saturating, + // and store back to memory. + int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); + int16 clamped_output_activ = + std::max(-128, std::min(127, rescaled_output_activ)); + output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ; + } + } +} + +template +void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape *const *output_shapes, Scalar *const *output_data) +{ + ruy::profiler::ScopeLabel label("Split"); + const int split_dimensions = input_shape.DimensionsCount(); + int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis; + int outputs_count = params.num_split; + TFLITE_DCHECK_LT(axis, split_dimensions); + + int64_t split_size = 0; + for (int i = 0; i < outputs_count; i++) + { + TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions); + for (int j = 0; j < split_dimensions; j++) + { + if (j != axis) + { + MatchingDim(*output_shapes[i], j, input_shape, j); + } + } + split_size += output_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + // For all output arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < split_dimensions; ++i) + { + base_inner_size *= input_shape.Dims(i); + } + + const Scalar *input_ptr = input_data; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < outputs_count; ++i) + { + const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size; + memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar)); + input_ptr += copy_size; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) +{ + return (b * height + h) * width + w; +} + +inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) + { + for (int c = 0; c < depth; ++c) + { + const int begin_input_c = std::max(0, static_cast(c - op_params.range)); + const int end_input_c = std::min(depth, static_cast(c + op_params.range)); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) + { + const float input_val = input_data[i * depth + input_c]; + accum += input_val * input_val; + } + const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta); + output_data[i * depth + c] = input_data[i * depth + c] * multiplier; + } + } +} + +inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; i++) + { + output_data[i] = static_cast(input_data[i]); + } +} + +inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, + const float *input_data, const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("FakeQuant"); + float rmin = op_params.minmax.min; + float rmax = op_params.minmax.max; + int num_bits = op_params.num_bits; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size); +} + +// Common subroutine for both `GatherNd` and `GatherNdString`. +struct GatherNdHelperResult +{ + int n_slices; + int slice_size; + int indices_nd; + std::vector dims_to_count; +}; + +// Returns common values being used on both `GatherNd` and `GatherNdString`. +inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape, + const RuntimeShape &indices_shape) +{ + GatherNdHelperResult ret; + ret.n_slices = 1; + ret.slice_size = 1; + const int indices_dims = indices_shape.DimensionsCount(); + ret.indices_nd = indices_shape.Dims(indices_dims - 1); + const int params_dims = params_shape.DimensionsCount(); + for (int i = 0; i < indices_dims - 1; ++i) + { + ret.n_slices *= indices_shape.Dims(i); + } + for (int i = ret.indices_nd; i < params_dims; ++i) + { + ret.slice_size *= params_shape.Dims(i); + } + + int remain_flat_size = params_shape.FlatSize(); + ret.dims_to_count = std::vector(ret.indices_nd, 0); + for (int i = 0; i < ret.indices_nd; ++i) + { + ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i); + remain_flat_size = ret.dims_to_count[i]; + } + + return ret; +} + +template +inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, ParamsT *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNd"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + std::memcpy(output_data + i * res.slice_size, params_data + from_pos, + sizeof(ParamsT) * res.slice_size); + } +} + +#ifndef TF_LITE_STATIC_MEMORY +template +inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, TfLiteTensor *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNdString"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + DynamicBuffer buffer; + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + for (int j = 0; j < res.slice_size; ++j) + { + buffer.AddString(GetString(params_data, from_pos + j)); + } + } + buffer.WriteToTensor(output_data, /*new_shape=*/nullptr); +} +#endif + +template +inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &updates_shape, const UpdatesT *updates_data, + const RuntimeShape &output_shape, UpdatesT *output_data) +{ + ruy::profiler::ScopeLabel label("ScatterNd"); + + int n_slices = 1; + int slice_size = 1; + const int outer_dims = indices_shape.DimensionsCount() - 1; + const int indices_nd = indices_shape.Dims(outer_dims); + const int updates_dims = updates_shape.DimensionsCount(); + for (int i = 0; i < outer_dims; ++i) + { + n_slices *= indices_shape.Dims(i); + } + for (int i = outer_dims; i < updates_dims; ++i) + { + slice_size *= updates_shape.Dims(i); + } + + int output_flat_size = output_shape.FlatSize(); + int remain_flat_size = output_flat_size; + std::vector dims_to_count(indices_nd, 0); + for (int i = 0; i < indices_nd; ++i) + { + dims_to_count[i] = remain_flat_size / output_shape.Dims(i); + remain_flat_size = dims_to_count[i]; + } + + memset(output_data, 0, sizeof(UpdatesT) * output_flat_size); + for (int i = 0; i < n_slices; ++i) + { + int to_pos = 0; + for (int j = 0; j < indices_nd; ++j) + { + IndicesT idx = indices_data[i * indices_nd + j]; + TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j)); + to_pos += idx * dims_to_count[j]; + } + for (int j = 0; j < slice_size; j++) + { + output_data[to_pos + j] += updates_data[i * slice_size + j]; + } + } +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const RuntimeShape &output_shape, SequentialTensorWriter *writer) +{ + const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); + TFLITE_DCHECK_LE(op_params.begin_count, 5); + TFLITE_DCHECK_LE(op_params.size_count, 5); + const int begin_count = op_params.begin_count; + const int size_count = op_params.size_count; + // We front-pad the begin and size vectors. + std::array start; + std::array stop; + for (int i = 0; i < 5; ++i) + { + int padded_i = 5 - i; + start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; + stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1) + ? ext_shape.Dims(i) + : start[i] + op_params.size[size_count - padded_i]; + } + + for (int i0 = start[0]; i0 < stop[0]; ++i0) + { + for (int i1 = start[1]; i1 < stop[1]; ++i1) + { + for (int i2 = start[2]; i2 < stop[2]; ++i2) + { + for (int i3 = start[3]; i3 < stop[3]; ++i3) + { + for (int i4 = start[4]; i4 < stop[4]; ++i4) + { + writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); + } + } + } + } + } +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + SequentialTensorWriter writer(input_data, output_data); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output) +{ + SequentialTensorWriter writer(input, output); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template +void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto min_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Minimum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto max_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Maximum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater()); +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template +inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, + const RuntimeShape &input2_shape, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + // Drop shape of second input: not needed. + ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template +void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + int64_t flatsize; + // Allow select operator executions on mixed scalar tensors and one element + // tensors. + if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 && + input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) + { + flatsize = 1; + } + else + { + flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); + } + for (int64_t i = 0; i < flatsize; ++i) + { + output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +template +void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int64_t outer_size = input_condition_shape.FlatSize(); + int64_t inner_size; + if (input_condition_shape.DimensionsCount() == 0) + { + inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape); + } + else + { + TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size); + inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); + } + + int64_t offset = 0; + for (int64_t i = 0; i < outer_size; i++) + { + const T *input_data = input_condition_data[i] ? input_x_data : input_y_data; + memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); + offset += inner_size; + } +} + +template +void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); + + const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape); + + NdArrayDesc<4> desc_condition; + NdArrayDesc<4> desc_x; + NdArrayDesc<4> desc_y; + NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape, + &desc_condition, &desc_x, &desc_y); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) + { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) + { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) + { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) + { + const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c); + const int x_index = SubscriptToIndex(desc_x, b, y, x, c); + const int y_index = SubscriptToIndex(desc_y, b, y, x, c); + output_data[Offset(extended_output_shape, b, y, x, c)] = + input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; + } + } + } + } +} + +template +void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data, + T *output_data) +{ + const size_t size = input_condition_shape.FlatSize(); + if (size == 0) + { + // Dimension is zero, in which case we don't need to output. + return; + } + const size_t cond_rank = input_condition_shape.DimensionsCount(); + + std::vector dims_to_count(cond_rank, 0); + int cur_flat_size = size; + for (int i = 0; i < cond_rank; ++i) + { + dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i); + cur_flat_size = dims_to_count[i]; + } + + int output_index = 0; + for (int i = 0; i < size; ++i) + { + if (input_condition_data[i]) + { + // Insert the coordinate of the current item (row major) into output. + int flat_index = i; + for (int j = 0; j < cond_rank; ++j) + { + int coord_j = flat_index / dims_to_count[j]; + output_data[output_index * cond_rank + j] = coord_j; + flat_index %= dims_to_count[j]; + } + output_index++; + } + } +} + +// For easy implementation, the indices is always a vector of size-4 vectors. +template +inline void SparseToDense(const std::vector> &indices, const T *values, + T default_value, bool value_is_scalar, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int value_count = indices.size(); + + // First fill the output_data with default value. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; ++i) + { + output_data[i] = default_value; + } + + // Special handle for value is scalar case to avoid checking the boolean + // condition within the loop every time. + if (value_is_scalar) + { + for (int i = 0; i < value_count; ++i) + { + const std::vector &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = *values; // just use the first value. + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } + return; + } + + // Go through the values and indices to fill the sparse values. + for (int i = 0; i < value_count; ++i) + { + const std::vector &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = values[i]; + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } +} + +template +inline void Pow(const RuntimeShape &input1_shape, const T *input1_data, + const RuntimeShape &input2_shape, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = std::pow(input1_data[i], input2_data[i]); + } +} + +template +inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data, + const RuntimeShape &unextended_input2_shape, const T *input2_data, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1, + &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) + { + for (int y = 0; y < output_shape.Dims(1); ++y) + { + for (int x = 0; x < output_shape.Dims(2); ++x) + { + for (int c = 0; c < output_shape.Dims(3); ++c) + { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = std::pow(in1_val, in2_val); + } + } + } + } +} + +template +void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Reverse"); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_axis = input_shape.Dims(axis); + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_axis; ++j) + { + const int start_pos = (i * dims_at_axis + j) * copy_size; + Scalar *output_ptr = output_data + start_pos; + int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template +void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim, + const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("ReverseSequence"); + + int outer_size = 1; + int outer_dim = std::min(batch_dim, seq_dim); + int medium_dim = std::max(batch_dim, seq_dim); + for (int i = 0; i < outer_dim; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int medium_size = 1; + for (int i = outer_dim + 1; i < medium_dim; ++i) + { + medium_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_outer_dim = input_shape.Dims(outer_dim); + const int dims_at_medium_dim = input_shape.Dims(medium_dim); + + Scalar *output_ptr; + if (batch_dim > seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + int sl = seq_lengths[q] - 1; + if (j > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size; + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } + else if (batch_dim < seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + int sl = seq_lengths[j] - 1; + const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + if (q > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } +} + +template +inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape); + + memset(output_data, 0, sizeof(T) * output_shape.FlatSize()); + + for (int i = 0; i < input_shape.Dims(0); i++) + { + int output_index = segment_ids_data[i]; + for (int j = 0; j < segment_flat_size; ++j) + { + output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j]; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h index 958fd4b..6c0220c 100644 --- a/compiler/luci-interpreter/src/core/KernelParams.h +++ b/compiler/luci-interpreter/src/core/KernelParams.h @@ -170,6 +170,11 @@ struct ResizeNearestNeighborParams bool half_pixel_centers; }; +struct ShapeParams +{ + loco::DataType out_type; +}; + struct SubParams { Activation activation; diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp new file mode 100644 index 0000000..e09d633 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/Utils.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output) + : Kernel({dims, value}, {output}) +{ +} + +template void Fill::configureShape() +{ + const auto dims_data = getTensorData(dims()); + Shape output_shape(dims()->shape().dim(0)); + + for (int i = 0; i < output_shape.num_dims(); ++i) + { + T data = dims_data[i]; + if (data < 0) + throw std::runtime_error("Fill dimensions must be >= 0"); + + output_shape.dim(i) = data; + } + + output()->resize(output_shape); +} + +void Fill::configure() +{ + const auto dims_shape = dims()->shape(); + const auto value_shape = value()->shape(); + + // Make sure the 1st input tensor is 1-D + LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1); + + // Make sure the 1st input tensor is int32 or int64 + LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or + dims()->element_type() == DataType::S64); + + // Make sure the 2nd input tensor is a scalar + LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0) + + // Check zero point and scale for S16 and S8 + if (value()->element_type() == loco::DataType::S16 or + value()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale()); + LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point()); + + if (value()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(value()->zero_point() == 0); + } + // Resize output + switch (dims()->element_type()) + { + case DataType::S32: + configureShape(); + break; + case DataType::S64: + configureShape(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Fill::execute() const +{ + switch (output()->element_type()) + { + case DataType::S8: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S16: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S64: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::FLOAT32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h new file mode 100644 index 0000000..184f0cb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FILL_H +#define LUCI_INTERPRETER_KERNELS_FILL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Fill : public Kernel +{ +public: + Fill(const Tensor *dims, const Tensor *value, Tensor *output); + + const Tensor *dims() const { return _inputs[0]; } + const Tensor *value() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void configureShape(); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FILL_H diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp new file mode 100644 index 0000000..cf56df5 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FillTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +template void runFillIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector dims_data = {2, 3}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor
(/*scalar*/ {}, value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + + std::vector ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +template void runFillQuantIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector dims_data = {2, 3}; + std::vector value_data = {5}; + + int32_t zero_point = 0; + + if (DT == loco::DataType::S8) + zero_point = 1; + + Tensor dims = makeInputTensor(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor
(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point, + value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + + std::vector ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, FillInt) +{ + // Run for int32_t input + runFillIntKernel(_memory_manager.get()); + // Run for int64_t input + runFillIntKernel(_memory_manager.get()); + // Run for int8_t input + runFillQuantIntKernel(_memory_manager.get()); + // Run for int16_t input + runFillQuantIntKernel(_memory_manager.get()); + + SUCCEED(); +} + +TEST_F(FillTest, FillFloat) +{ + Shape dims_shape{3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5, 5, 5}; + + std::vector ref_output_shape{2, 2, 2}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, Invalid_Input_Shape_NEG) +{ + Shape dims_shape{1, 3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FillTest, Invalid_Value_Shape_NEG) +{ + Shape dims_shape{3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = makeInputTensor({1}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp index 2fbeefc..bae1eac 100644 --- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp @@ -19,6 +19,8 @@ #include "kernels/Utils.h" +#include + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp index 6fee938..42aab33 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.cpp @@ -76,9 +76,8 @@ void Pack::configure() } } - if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 || - t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 || - t0->element_type() == DataType::S64) + if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 || + t0->element_type() == DataType::S16) { LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point()); LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp index 2404e43..d16320b 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp @@ -38,18 +38,26 @@ void Check(std::vector> input_shapes, std::vector tmp_inputs; for (int i = 0; i < input_datas.size(); i++) { - if (std::is_same::value) + if (std::is_same::value || std::is_same::value || + std::is_same::value) { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } - else + else if (std::is_same::value || std::is_same::value) { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } + else + { + assert((std::is_same::value) && "unexpected dtype is tested"); + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } } for (int i = 0; i < input_datas.size(); i++) { @@ -57,10 +65,14 @@ void Check(std::vector> input_shapes, } Tensor output_tensor = makeOutputTensor(element_type); - if (!std::is_same::value) + if (std::is_same::value || std::is_same::value) { output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128); } + else if (std::is_same::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f, 0); + } PackParams params{}; params.axis = axis; @@ -79,7 +91,7 @@ template class PackTest : public ::testing::Test { }; -using DataTypes = ::testing::Types; +using DataTypes = ::testing::Types; TYPED_TEST_SUITE(PackTest, DataTypes); TYPED_TEST(PackTest, ThreeInputs) diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp index fe17288..c07f6e3 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.cpp @@ -20,6 +20,8 @@ #include +#include + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp index e904692..197cdaa 100644 --- a/compiler/luci-interpreter/src/kernels/PadV2.cpp +++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp @@ -20,6 +20,8 @@ #include +#include + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp new file mode 100644 index 0000000..d58cd15 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" + +#include "kernels/Utils.h" + +#include + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms) + : KernelWithParams({input, axes}, {output, temp_index, resolved_axes}, params) +{ +} + +void ReduceMax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData(axes()); + int num_axes = axes()->shape().num_elements(); + LUCI_INTERPRETER_CHECK(num_axes <= 4); + + // We compute shapes of outputs in configure, assuming that outputs have + // static shape + // TODO Support dynamic shape + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); +} + +void ReduceMax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + // TODO Support quantized kernels + default: + throw std::runtime_error("Unsupported type."); + } +} + +void ReduceMax::evalFloat() const +{ + const auto *axes_data = getTensorData(axes()); + int num_axes = axes()->shape().num_elements(); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + int num_resolved_axis = 0; + LUCI_INTERPRETER_CHECK( + tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes, + getTensorData(resolved_axes), &num_resolved_axis)); + + float init_value = std::numeric_limits::lowest(); + tflite::reference_ops::ReduceGeneric( + getTensorData(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData(output()), getTensorShape(output()).DimsData(), + output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims, + getTensorData(temp_index), getTensorData(resolved_axes), init_value, + [](const float current, const float in) -> float { return (in > current) ? in : current; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h new file mode 100644 index 0000000..25a6627 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H +#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReduceMax : public KernelWithParams +{ +public: + ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp new file mode 100644 index 0000000..ab68882 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReduceMaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(ReduceMaxTest, FloatNotKeepDims) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{23, 24}; + std::initializer_list ref_output_shape{2}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ReduceMaxTest, FloatKeepDims) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{20, 22, 24}; + std::initializer_list ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp new file mode 100644 index 0000000..0429fe1 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void ShapeKernel::configure() +{ + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or + output()->element_type() == DataType::S64); + const auto input_shape = input()->shape(); + + Shape output_shape(1); + output_shape.dim(0) = input_shape.num_dims(); + + output()->resize(output_shape); +} + +void ShapeKernel::execute() const +{ + switch (params().out_type) + { + case DataType::S32: + evalInt(); + break; + case DataType::S64: + evalInt(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void ShapeKernel::evalInt() const +{ + const auto input_shape = input()->shape(); + + auto output_data = getTensorData(output()); + + for (int i = 0; i < input_shape.num_dims(); ++i) + { + output_data[i] = input_shape.dim(i); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h new file mode 100644 index 0000000..cfaadec --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H +#define LUCI_INTERPRETER_KERNELS_SHAPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ShapeKernel : public KernelWithParams +{ +public: + ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void evalInt() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp new file mode 100644 index 0000000..4763e01 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ShapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +template void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager) +{ + Shape input_shape{1, 3, 1, 3, 5}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(dataType); + + ShapeParams params{}; + params.out_type = dataType; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{1, 3, 1, 3, 5}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + + std::vector ref_output_shape{5}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ShapeTest, OutTypeInt) +{ + + // Run for int32_t output + runShapeKernel(loco::DataType::S32, _memory_manager.get()); + // Run for int64_t output + runShapeKernel(loco::DataType::S64, _memory_manager.get()); + + SUCCEED(); +} + +TEST_F(ShapeTest, Invalid_Output_Type_NEG) +{ + Shape input_shape{1, 3}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + ShapeParams params{}; + params.out_type = loco::DataType::FLOAT32; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp index 2819882..aa68208 100644 --- a/compiler/luci-interpreter/src/kernels/SplitV.cpp +++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp @@ -43,14 +43,36 @@ void SplitV::configure() auto sizes_data = getTensorData(size_splits()); assert(size_splits()->shape().num_dims() == 1); + + int32_t sum = 0; + const auto num_dims_size_spits = size_splits()->shape().dim(0); + int32_t count_neg_dim = 0; + + for (int32_t i = 0; i < num_dims_size_spits - 1; ++i) + { + if (sizes_data[i] != -1) + { + sum += sizes_data[i]; + } + else + { + count_neg_dim++; + } + } + assert(count_neg_dim < 2); assert(size_splits()->shape().num_elements() == num_split); - assert(std::accumulate(sizes_data, sizes_data + num_split, 0) == - input()->shape().dim(_axis_value)); auto output_shape = input()->shape(); for (int32_t i = 0; i < num_split; ++i) { - output_shape.dim(_axis_value) = sizes_data[i]; + if (sizes_data[i] == -1) + { + output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum; + } + else + { + output_shape.dim(_axis_value) = sizes_data[i]; + } _outputs[i]->resize(output_shape); } } diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp index c6452cd..a8730d8 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp @@ -136,6 +136,11 @@ void StridedSlice::execute() const getTensorData(input()), getTensorShape(output()), getTensorData(output())); break; + case DataType::S32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index dba3905..4020709 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -187,7 +187,7 @@ void GraphLoader::loadTensors() const auto *node = loco::must_cast(_graph->nodes()->at(i)); if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) - throw std::runtime_error("Unknown Custom Node, yet."); + throw std::runtime_error("Unsupported Custom operator. " + node->name()); if (!isTensorProducingNode(node)) continue; diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp index decccaa..501e847 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Add.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleAdd(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp index 0ee3677..f3ca557 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleArgMax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); const Tensor *axis = helper.getInputTensor(node->dimension()); diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp index efb0112..a813570 100644 --- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp index aae3dba..9da2f6d 100644 --- a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *lhs = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp index 33d0e2d..ac6ebb3 100644 --- a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp index 21ea5ce..a16354c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleCast(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp index 7823a99..ba2564e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); std::vector inputs(node->numValues()); for (uint32_t i = 0; i < node->numValues(); ++i) { diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp index b48d97d..218165e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleConv2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp index 0310fb2..1749463 100644 --- a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp index db26ecf..8af1e3b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp index 4aae564..787322e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleDequantize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp index 56c2e98..0611dfd 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Div.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleDiv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); const Tensor *input2 = helper.getInputTensor(node->y()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp index 98ee78b..a79985e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleElu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp index 649d9bf..5969288 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp @@ -25,9 +25,7 @@ std::unique_ptr build_kernel_CircleEqual(const luci::CircleNode *circle_ KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp index 411d142..30d11cb 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleExp(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp new file mode 100644 index 0000000..3aefdf1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Fill.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleFill(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const auto dims = helper.getInputTensor(node->dims()); + const auto value = helper.getInputTensor(node->value()); + auto output = helper.getOutputTensor(node); + + return std::make_unique(dims, value, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp index 6d8435f..e0a2231 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleFloor(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp index cae2e18..a45d89e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp index 0b8ac44..b7b742b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp index 9df9775..2ee2906 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleGather(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *params = helper.getInputTensor(node->params()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp index 3db11b8..80aa63c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleGreater(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp index dbe051d..272f284 100644 --- a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp index 5983f4d..3ac7d49 100644 --- a/compiler/luci-interpreter/src/loader/nodes/If.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleIf(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); auto output_nodes = collectOutputNodes(node); assert(node->arity() == 1 + node->input_count()); assert(output_nodes.size() == static_cast(node->output_count())); diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp index 0a8fb85..06031e5 100644 --- a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp index 05f9202..6e22e6d 100644 --- a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp index 0e70afa..95b5589 100644 --- a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp index 7b229ad..bbf5067 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp index 81156f2..ae914ec 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Less.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLess(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp index 82141e5..f1b424b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp index a12dce0..962ca2d 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -25,9 +25,7 @@ std::unique_ptr build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp index 6cf547a..4322041 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->logits()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp index 2c9549f..bf3cb67 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp index 3d327d6..fefcd9a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp index 50566bb..a416cb4 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp index e4160ed..4a69dee 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleLogistic(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp index 914f228..f66a206 100644 --- a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp index dc50d67..d0bff77 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMaximum(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp index 97d9120..0dec63e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMean(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp index ff65952..1a49c10 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMinimum(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp index ebf2945..b221b45 100644 --- a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp index 4f9da96..f998485 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleMul(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp index 23c0053..9a9ecf9 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleNeg(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp index 8e5711f..3916a58 100644 --- a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp index e31601b..f3d700c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CirclePRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp index 6994720..efc5850 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CirclePack(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == node->values_count()); std::vector inputs(node->values_count()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp index 7705492..67ce997 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CirclePad(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp index 12deb15..e378a97 100644 --- a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CirclePadV2(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp index b430bc9..d32fc3d 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CirclePow(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp index fd98363..cb36fb6 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -24,9 +24,8 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleQuantize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp new file mode 100644 index 0000000..1a8522d --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReduceMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleReduceMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique(input, axes, output, temp_index, resolved_axes, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp index d53a66a..1d64c1c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp index f1b5d21..e50cd25 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleRelu6(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp index 89e3ece..76ddd88 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleReshape(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->tensor()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp index dca5658..dc2b88a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp index d1ea19c..c7058ae 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -25,9 +25,7 @@ std::unique_ptr build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp index ea00f54..c1a7f53 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->tensor()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp index ff87f43..0714a5d 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp index 89528d5..d172ef4 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -24,9 +24,8 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSVDF(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 5); const Tensor *input = helper.getInputTensor(node->input()); const Tensor *feature = helper.getInputTensor(node->weight_feature()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp new file mode 100644 index 0000000..d1edbc7 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Shape.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleShape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const auto input = helper.getInputTensor(node->input()); + auto output = helper.getOutputTensor(node); + + ShapeParams shape_params{}; + shape_params.out_type = node->out_type(); + + return std::make_unique(input, output, shape_params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp index 741cd08..60ac641 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSlice(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp index b15e4b6..f41f63f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->logits()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp index 91c237a..b6e6cf5 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp index 3cbbd97..63fdb95 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp index 32553ad..3f6d4a7 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Split.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSplit(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); auto output_nodes = collectOutputNodes(node); assert(node->arity() == 2); assert(output_nodes.size() == static_cast(node->num_split())); diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp index d788164..0788822 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSplitV(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); auto output_nodes = collectOutputNodes(node); assert(node->arity() == 3); assert(output_nodes.size() == static_cast(node->num_split())); diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp index 56dd986..b9843fe 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSqrt(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp index 43aadb9..0ad7c17 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Square.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSquare(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp index 6a2717a..e4c6fd8 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp index 583ff93..6885f80 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp index fe5fa77..359b4e3 100644 --- a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 4); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp index bad4fbb..a6252cb 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleSub(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp index f425529..a58ef60 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleTanh(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp index 4e095fb..ea17d83 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleTranspose(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->a()); diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp index 1b954c3..d773e30 100644 --- a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); assert(node->arity() == 4); const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp index 978c738..a1c0d32 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleUnpack(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); auto output_nodes = collectOutputNodes(node); assert(node->arity() == 1); assert(output_nodes.size() == static_cast(node->num())); diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp index 284dc0c..8fde6ec 100644 --- a/compiler/luci-interpreter/src/loader/nodes/While.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr build_kernel_CircleWhile(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast(circle_node); auto output_nodes = collectOutputNodes(node); assert(node->arity() == node->input_count()); diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt index c8a2e12..642cf14 100644 --- a/compiler/luci-micro/CMakeLists.txt +++ b/compiler/luci-micro/CMakeLists.txt @@ -15,7 +15,7 @@ set(CMAKE_ARM_OPTIONS -DLUCI_STATIC=ON -DBUILD_CMSIS_NN_FUNCTIONS=ON -DTARGET_CPU=cortex-m7 - "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake" + "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake" "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu" "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}" "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}" diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt new file mode 100644 index 0000000..1f7acee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") +if (NOT LUCI_INTERPRETER_PAL_DIR) + set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux") +endif() + +set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst) + +if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX) + set(LUCI_INTERPRETER_SUFFIX "") +else() + set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX}) +endif() + +add_subdirectory(src) diff --git a/compiler/luci-micro/luci-interpreter/README.md b/compiler/luci-micro/luci-interpreter/README.md new file mode 100644 index 0000000..77ec5c8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/README.md @@ -0,0 +1,158 @@ +# luci-interpreter + +`luci-interpreter` is an inference engine for neural networks represented in luci IR. +See `compiler/luci/lang` directory for details about IR. +You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`. + +`luci-interpreter` provides: +- Basic inference functionality, input setters and output getters +- Interface for inspecting hidden interpreter state, like activation values during inference +- Customization mechanisms to fit the interpreter to specific platforms, like MCUs + +Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory + +## Basic usage + +Minimal usage includes: +- Setting input data +- Running inference +- Fetching inference results + +Interpreter object is reusable and can run multiple inferences. +Elements in tensors (input/output/internal) are stored contiguously and have C-like layout: +This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1. + +Input and output tensors have the same indexes as in original luci model. + +**Usage example:** +``` c++ +// Note getTensorSize is a function that computes tensor size, +// it is not part of interpreter and should be implemented by user + +luci_interpreter::Interpreter interpreter(luci_module); + +// Set inputs +// assuming model has only one input and one output +const auto input_nodes = loco::input_nodes(module->graph()); + +const auto *input_node = dynamic_cast(input_nodes[0]); +std::vector input_data(getTensorSize(input_node)); +// Initialize input data here + +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +// Start inference +interpreter.interpret(); + +// Fetch inference results +const auto output_nodes = loco::output_nodes(module->graph()); +const auto *output_node = dynamic_cast(output_nodes[0]); +std::vector output_data(getTensorSize(output_node)); +interpreter.readOutputTensor(output_node, output_data.data(), output_data.size()); +``` + +## Inspecting intermediate state + +Interpreter provides interfaces to investigate internal state of interpreter during inference. + +This is done by "observer" mechanism: +- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object +- `ExecutionObserver` defines several callback methods user can override to inject custom code + +ExecutionObserver provides three callbacks: +- `postTensorWrite` checks contents of output tensor after operation execution +- `preOperatorExecute` notifies that interpreter is going to execute operation +- `postOperatorExecute` notifies that interpreter has finished execution of an operation + +See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details. + +**Usage example:** +``` c++ +class CustomExecutionObserver: public luci_interpreter::ExecutionObserver +{ +public: + void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override + { + if (tensor->element_type() != loco::DataType::FLOAT32) + return; + for (int i = 0; i < tensor->shape().num_elements(); ++i) + std::cout << tensor->data[i] << ", "; + } + + // User observer can override only needed methods, + // others will inherit empty implementation from base observer. + + // void preOperatorExecute(const luci::CircleNode *node); + // void postOperatorExecute(const luci::CircleNode *node); +}; + +luci_interpreter::Interpreter interpreter(module); +CustomExecutionObserver observer; +interpreter.attachObserver(&observer); + +// initialize input_data +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +interpreter.interpret(); +``` + +## Customizing inference + +### Memory manager + +Interpreter provides a handle for altering default memory management mechanisms. + +This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details. + +This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory. + +User can construct an interpreter with one of predefined memory managers or their own custom memory manager. +Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. + +List of predefined memory managers: +- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one. +- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests. +- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete. +- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs). + +**SimpleMemoryManager usage example:** + +No need to select anything, to use this memory manager. +``` c++ +luci_interpreter::Interpreter interpreter(module); +``` + +**TestMemoryManager usage example:** + +``` c++ +luci_interpreter::TestMemoryManager mm; +luci_interpreter::Interpreter interpreter(module, &mm); +``` + +**BuddyMemoryManager usage example:** + +`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation. + +This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations. + +Limitations +- Current implementation uses only lower power-of-two bytes of given buffer. + + For example for 1000 bytes buffer, only lower 512 bytes will be used. +- Current implementation can handle maximum 4 gigabyte memory pool + +``` c++ + constexpr int buffer_size = 2048; + static uint8_t buffer[buffer_size]; + luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size); + luci_interpreter::Interpreter interpreter(module.get(), &memory_manager); +``` + +**StaticMemoryManager usage example:** +``` c++ +TBD when it is merged +``` + +## Further reading + +If you want to participate in development, please read `DEVELOPER.md` for SW architecture details. diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h new file mode 100644 index 0000000..205baa6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/MemoryManager.h" + +#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H + +namespace luci_interpreter +{ + +class BuddyMemoryManager : public IMemoryManager +{ +public: + BuddyMemoryManager(uint8_t *memory_start, int32_t memSize); + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + struct Block + { + Block *next_free; + bool is_free; + uint32_t size; + // debug field + Block *self; + }; + + Block *_start_block; + int32_t _num_blocks; + uint32_t _size; + Block *_free_blocks[32]{}; + + static int32_t lowerLog2(uint32_t val) + { + int32_t i = 0; + while (val >>= 1) + i++; + + return i; + } + + void addToBlocks(Block *block, int32_t l) + { + if (!block) + return; + + block->next_free = _free_blocks[l]; + _free_blocks[l] = block; + } + + void removeFromBlocks(const Block *block, int32_t l) + { + if (!block) + return; + + Block *tmp = _free_blocks[l]; + + if (block == tmp) + { + _free_blocks[l] = block->next_free; + return; + } + + while (tmp) + { + if (tmp->next_free == block) + { + tmp->next_free = block->next_free; + return; + } + + tmp = tmp->next_free; + } + } + + void divideBlock(Block *block, int32_t l) + { + int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block); + + removeFromBlocks(block, l); + + // there is no need to add to the free_blocks list here + block->is_free = true; + block->size = size; + block->self = block; + + Block *buddy; + buddy = (Block *)((uint8_t *)block + sizeof(Block) + size); + buddy->is_free = true; + buddy->size = size; + buddy->self = buddy; + + addToBlocks(buddy, l - 1); + } + + Block *mergeBlock(Block *block) + { + Block *buddy; + + const int32_t l = lowerLog2(block->size + sizeof(Block)); + + const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block); + buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block); + + if (!buddy->is_free || buddy->size != block->size) + return nullptr; + + if (block > buddy) + { + Block *x = block; + block = buddy; + buddy = x; + } + + removeFromBlocks(block, l); + removeFromBlocks(buddy, l); + + block->size = block->size * 2 + sizeof(Block); + block->is_free = true; + block->self = block; + + addToBlocks(block, l + 1); + + return block; + } +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h new file mode 100644 index 0000000..375b1ae --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ +#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ + +#include + +namespace luci_interpreter +{ + +/** + * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from + * model's file. + * + * @warning Use this source only in case when model's buffer alive longer than Interpreter. + */ +std::unique_ptr source_without_constant_copying(); + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h new file mode 100644 index 0000000..8e2f457 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_INTERPRETER_H +#define LUCI_INTERPRETER_INTERPRETER_H + +#include "luci_interpreter/core/Tensor.h" + +#include +#include + +#include "luci_interpreter/MemoryManager.h" +#include + +#include +#include +#include + +namespace luci_interpreter +{ + +class ExecutionObserver +{ +public: + virtual ~ExecutionObserver(); + + // Called when the value of a tensor has been updated during execution. + virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor); + + // Called before / after executing an operator. + // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput, + // CircleConst and Circle*Out). + virtual void preOperatorExecute(const luci::CircleNode *node); + virtual void postOperatorExecute(const luci::CircleNode *node); +}; + +class Interpreter +{ +public: + explicit Interpreter(const luci::Module *module); + + explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager); + + ~Interpreter(); + + void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size); + + void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size); + + void interpret(); + + void attachObserver(ExecutionObserver *observer); + + const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; } + +private: + // _default_memory_manager should be before _runtime_module due to + // the order of deletion in the destructor + std::unique_ptr _default_memory_manager = nullptr; + std::unique_ptr _runtime_module; + + // Observer functionality support. + std::unique_ptr _runtime_to_ir; + std::unordered_map _node_to_tensor; + std::unique_ptr _event_notifier; + std::vector _observers; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_INTERPRETER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h new file mode 100644 index 0000000..f32c520 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_MEMORY_MANAGER_H + +#include "luci_interpreter/core/DataType.h" +#include "luci_interpreter/core/Tensor.h" + +namespace luci_interpreter +{ + +class IMemoryManager +{ +public: + virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0; + virtual void release_memory(luci_interpreter::Tensor &tensor) = 0; + + virtual ~IMemoryManager() = default; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h new file mode 100644 index 0000000..658a1c6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +class SimpleMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h new file mode 100644 index 0000000..ded7bde --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +// Used for allocations in static buffer, using offsets defined in luci model. +class StaticMemoryManager : public IMemoryManager +{ +public: + StaticMemoryManager() = delete; + + explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr) + { /* Do nothing */ + } + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + // Stores a pointer to the beginning of the allocated memory buffer. + uint8_t *_buffer_ptr; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h new file mode 100644 index 0000000..397bbed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ +// Memory Manager for using in kernels tests. This eliminates the need to manually delete the +// allocated memory in tests. This mem_manager remembers all its allocations and in destructor +// delete all allocations. +class TestMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + + ~TestMemoryManager() override + { + for (auto allocation : allocations) + { + delete[] allocation; + } + } + +private: + std::vector allocations; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h new file mode 100644 index 0000000..27bf719 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H +#define LUCI_INTERPRETER_CORE_DATATYPE_H + +#include +#include + +#include + +namespace luci_interpreter +{ + +using DataType = loco::DataType; + +template using DataTypeImpl = loco::DataTypeImpl
; + +inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); } + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_DATATYPE_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h new file mode 100644 index 0000000..bb9ff6d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_TENSOR_H +#define LUCI_INTERPRETER_CORE_TENSOR_H + +#include "luci_interpreter/core/DataType.h" + +#include +#include +#include +#include +#include +#include + +namespace luci_interpreter +{ + +class Shape +{ +public: + explicit Shape(int rank) : _dims(rank, 0) {} + + Shape(std::initializer_list dims) : _dims(dims.begin(), dims.end()) {} + + int num_dims() const { return _dims.size(); } + + int32_t dim(int i) const + { + assert(i >= 0 && i < static_cast(_dims.size())); + return _dims[i]; + } + + int32_t &dim(int i) + { + assert(i >= 0 && i < static_cast(_dims.size())); + return _dims[i]; + } + + int32_t num_elements() const + { + int32_t result = 1; + for (const int32_t dim : _dims) + { + result *= dim; + } + return result; + } + + bool operator==(const Shape &other) const { return _dims == other._dims; } + + bool operator!=(const Shape &other) const { return !operator==(other); } + +private: + std::vector _dims; +}; + +// Tensor affine quantization parameters. +// +// The relationship between real and quantized values: +// real_value = (quantized_value - zero_point) * scale +// +// In per-tensor case, 'scale' and 'zero_point' are one element each. +// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size +// of the quantized dimension. +// +// Note that due to historical and performance reasons, per-tensor quantization uses unsigned +// integer types, while per-channel uses signed types assuming 'zero_point' == 0. +struct AffineQuantization +{ + std::vector scale; + std::vector zero_point; + int32_t quantized_dimension; +}; + +class Tensor +{ +public: + Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name); + + DataType element_type() const { return _element_type; } + + const Shape &shape() const { return _shape; } + + float scale() const + { + assert(_quantization.scale.size() == 1); + return _quantization.scale[0]; + } + + int32_t zero_point() const + { + assert(_quantization.zero_point.size() == 1); + return _quantization.zero_point[0]; + } + + const std::vector &scales() const { return _quantization.scale; } + + const std::vector &zero_points() const { return _quantization.zero_point; } + + int32_t quantized_dimension() const { return _quantization.quantized_dimension; } + + template const T *data() const + { + static_assert(std::is_same::value or + std::is_same::value); + return reinterpret_cast(_data); + } + + template T *data() + { + static_assert(std::is_same::value or + std::is_same::value); + return reinterpret_cast(_data); + } + + const std::string &name() const { return _name; } + + void readData(void *data_ptr, size_t data_size) const; + + void writeData(const void *data_ptr, size_t data_size); + + void resize(const Shape &new_shape); + + void set_data_buffer(uint8_t *buffer) + { + if (buffer == nullptr) + { + _data_allocated = false; + } + else + { + _data_allocated = true; + } + _data = buffer; + } + + bool is_observable() const { return _is_observable; } + + void set_observable(bool value) { _is_observable = value; } + + bool is_allocatable() const { return _is_allocatable; } + + void set_allocatable(bool value) { _is_allocatable = value; } + + bool is_data_allocated() const { return _data_allocated; } + + int32_t get_offset() const { return _offset; } + + void set_offset(int32_t offset) { _offset = offset; } + +private: + DataType _element_type; + Shape _shape; + AffineQuantization _quantization; + uint8_t *_data; + std::string _name; + bool _data_allocated; + // Write of tensor is reported to registered Observers only if this tensor is observable + // This is needed for tensors used in kernel implementation, but not present in original model. + bool _is_observable = true; + // Memory manager is called for tensor only if it is "allocatable". + // Kernel configuration could disable allocation of some tensors if they are not needed for + // particular operation. + bool _is_allocatable = true; + // Used by static memory manager. + // Stores the offset from the beginning of the allocated memory buffer. + int32_t _offset = -1; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_TENSOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst new file mode 100644 index 0000000..f0df58d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -0,0 +1,62 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h new file mode 100644 index 0000000..21e6329 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h new file mode 100644 index 0000000..a274afb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include +#include +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + assert(scratchpad_data != nullptr); + + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + assert(batches == 1); + + const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = 1; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = depth; + + cmsis_nn_dims output_dims; + output_dims.n = 1; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = depth; + + cmsis_nn_pool_params pool_params; + pool_params.stride.h = params.stride_height; + pool_params.stride.w = params.stride_width; + pool_params.padding.h = params.padding_values.height; + pool_params.padding.w = params.padding_values.width; + pool_params.activation.min = params.quantized_activation_min; + pool_params.activation.max = params.quantized_activation_max; + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = params.filter_height; + filter_dims.w = params.filter_width; + filter_dims.c = 1; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims, + output_data); + assert(res == ARM_MATH_SUCCESS); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + if (input_data_type == luci_interpreter::DataType::S8) + { + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + const int32_t output_width = output_shape.Dims(2); + const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth); + auto data_type_size = static_cast(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h new file mode 100644 index 0000000..4dd77ff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h new file mode 100644 index 0000000..cfb84ea --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include +#include +#include +#include + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + if (scratchpad_data) + { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + assert(conv_params.dilation.h == 1); + assert(conv_params.dilation.w == 1); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + conv_params.activation.min = params.quantized_activation_min; + conv_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = const_cast(mult); + quant_params.shift = const_cast(shifts); + + assert(conv_params.activation.min <= conv_params.activation.max); + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = input_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data, + &filter_dims, filter_data, &bias_dims, bias_data, + &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 && + conv_params.dilation.w == 1) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_height; + filter_dims.w = filter_width; + filter_dims.c = input_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = output_height; + output_dims.w = output_width; + output_dims.c = output_depth; + + const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, + &filter_dims, &output_dims); + + luci_interpreter::Shape scratchpad_shape{buf_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h new file mode 100644 index 0000000..8463e57 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h new file mode 100644 index 0000000..120dcd8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include +#include +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + if (scratchpad_data) + { + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + assert(dw_conv_params.dilation.h == 1); + assert(dw_conv_params.dilation.w == 1); + + dw_conv_params.input_offset = params.input_offset; + dw_conv_params.output_offset = params.output_offset; + dw_conv_params.stride.h = params.stride_height; + dw_conv_params.stride.w = params.stride_width; + dw_conv_params.padding.h = params.padding_values.height; + dw_conv_params.padding.w = params.padding_values.width; + + dw_conv_params.activation.min = params.quantized_activation_min; + dw_conv_params.activation.max = params.quantized_activation_max; + dw_conv_params.ch_mult = params.depth_multiplier; + + cmsis_nn_per_channel_quant_params quant_params; + int32_t output_multiplier = params.output_multiplier; + int32_t output_shift = params.output_shift; + + quant_params.multiplier = &output_multiplier; + quant_params.shift = &output_shift; + + assert(dw_conv_params.activation.min <= dw_conv_params.activation.max); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims, + input_data, &filter_dims, filter_data, &bias_dims, + bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 && + dw_conv_params.dilation.w == 1) + { + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &output_dims); + + auto data_type_size = static_cast(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h new file mode 100644 index 0000000..15ff032 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h new file mode 100644 index 0000000..4089d0a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h new file mode 100644 index 0000000..32e9057 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + assert(output_shape.DimensionsCount() == 2); + + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + + const int filter_dim_count = filter_shape.DimensionsCount(); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + + cmsis_nn_fc_params fc_params; + fc_params.input_offset = params.input_offset; + fc_params.output_offset = params.output_offset; + fc_params.filter_offset = params.weights_offset; + fc_params.activation.min = params.quantized_activation_min; + fc_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_tensor_quant_params quant_params; + quant_params.multiplier = params.output_multiplier; + quant_params.shift = params.output_shift; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = 1; + input_dims.w = 1; + input_dims.c = accum_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = accum_depth; + filter_dims.h = 1; + filter_dims.w = 1; + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = 1; + output_dims.w = 1; + output_dims.c = output_depth; + + int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + auto buffer = std::make_unique(buf_size); + assert(buffer != nullptr); + + cmsis_nn_context ctx; + ctx.buf = buffer.get(); + ctx.size = buf_size; + + auto res = + arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims, + filter_data, &bias_dims, bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h new file mode 100644 index 0000000..f84742a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h new file mode 100644 index 0000000..38a302f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h new file mode 100644 index 0000000..9ccd222 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h new file mode 100644 index 0000000..347a97a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h new file mode 100644 index 0000000..be5903a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h new file mode 100644 index 0000000..6046789 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h new file mode 100644 index 0000000..cc9f0fd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h new file mode 100644 index 0000000..f4d5a6e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h new file mode 100644 index 0000000..a4a5b2a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t memory_size = weight_time_shape.Dims(1); + + cmsis_nn_dims input_dims; + input_dims.n = input_shape.Dims(0); + input_dims.h = input_shape.Dims(1); + + cmsis_nn_dims weights_feature_dims; + weights_feature_dims.n = weight_feature_shape.Dims(0); + weights_feature_dims.h = weight_feature_shape.Dims(1); + + cmsis_nn_dims weights_time_dims; + weights_time_dims.n = weight_time_shape.Dims(0); + weights_time_dims.h = weight_time_shape.Dims(1); + + cmsis_nn_dims bias_dims; + bias_dims.n = bias_shape.Dims(0); + + cmsis_nn_dims state_dims; + state_dims.n = batch_size; + state_dims.h = memory_size * num_filters; + + cmsis_nn_dims output_dims; + output_dims.n = output_shape.Dims(0); + output_dims.h = output_shape.Dims(1); + + cmsis_nn_svdf_params svdf_params; + svdf_params.rank = params.rank; + svdf_params.input_offset = input_zp; + svdf_params.output_offset = output_zp; + + svdf_params.input_activation.min = INT16_MIN; + svdf_params.input_activation.max = INT16_MAX; + + svdf_params.output_activation.min = INT8_MIN; + svdf_params.output_activation.max = INT8_MAX; + + cmsis_nn_per_tensor_quant_params in_quant_params; + in_quant_params.multiplier = scale_1_a; + in_quant_params.shift = scale_1_b; + + cmsis_nn_per_tensor_quant_params out_quant_params; + out_quant_params.multiplier = scale_2_a; + out_quant_params.shift = scale_2_b; + + cmsis_nn_context scratch_ctx; + scratch_ctx.buf = scratchpad_data; + + cmsis_nn_context scratch_output_ctx; + scratch_output_ctx.buf = output_temp_data; + + arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params, + &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims, + weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data, + &output_dims, output_data); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not supported for cmsisnn"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h new file mode 100644 index 0000000..6bbda48 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} + +template <> +inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int32_t mult = params.input_multiplier; + const int32_t shift = params.input_left_shift; + const int32_t diff_min = params.diff_min; + + arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h new file mode 100644 index 0000000..fdddaa9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h new file mode 100644 index 0000000..816b7f6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h new file mode 100644 index 0000000..ea57578 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake new file mode 100644 index 0000000..a68b363 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake @@ -0,0 +1,65 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + nnas_find_package(CMSISSource EXACT 5.8.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + + if (NOT CMSISSource_FOUND) + message(STATUS "Skipping luci-interpreter: CMSISSource not found") + return() + endif () + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c") + list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) + add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES}) + set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN) + target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC + "${CMSISSource_DIR}/CMSIS/NN/Include" + "${CMSISSource_DIR}/CMSIS/DSP/Include" + "${CMSISSource_DIR}/CMSIS/Core/Include") + + target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst new file mode 100644 index 0000000..8e20559 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -0,0 +1,77 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchMatMul) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Gather) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LocalResponseNormalization) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(LogSoftmax) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Mean) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(OneHot) +REGISTER_KERNEL(Pack) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(Pow) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Relu) +REGISTER_KERNEL(Relu6) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(ReverseV2) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Slice) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(Split) +REGISTER_KERNEL(SplitV) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(Unpack) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h new file mode 100644 index 0000000..21e6329 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h new file mode 100644 index 0000000..cce3060 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h new file mode 100644 index 0000000..3894f2d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H +#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H + +#include + +namespace luci_interpreter_pal +{ +inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data, + const tflite::RuntimeShape &rhs_shape, const float *rhs_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad, + luci_interpreter::Tensor *rhs_scratchpad, + const tflite::RuntimeShape &lhs_shape, + const tflite::RuntimeShape &rhs_shape) +{ + // Scratchpad for transposed LHS + { + auto lhs_rank = lhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(lhs_rank); + for (int i = 0; i < lhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = lhs_shape.Dims(i); + } + scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1); + scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2); + + lhs_scratchpad->resize(scratchpad_size); + } + // Scratchpad for transposed RHS + { + auto rhs_rank = rhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(rhs_rank); + for (int i = 0; i < rhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = rhs_shape.Dims(i); + } + scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1); + scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2); + + rhs_scratchpad->resize(scratchpad_size); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h new file mode 100644 index 0000000..3fe2022 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h new file mode 100644 index 0000000..985a15f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + if (scratchpad_data) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + tflite::RuntimeShape im2col_shape{batches, output_height, output_width, + input_depth * filter_height * filter_width}; + + tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + scratchpad_data); + } + else + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + // TODO This should only be done once (although it takes only a few microseconds). + // Also, the user should be able to adjust the number of threads. + auto gemmlowp_context = std::make_unique(); + gemmlowp_context->set_max_num_threads(static_cast(std::thread::hardware_concurrency())); + + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, gemmlowp_context.get()); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + // TODO enable optimized version + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + + // Allocate tensor for scratchpad, if needed. + // The checks here should be aligned with the actual implementation. + const bool need_dilated_scratchpad = + params.dilation_height_factor != 1 || params.dilation_width_factor != 1; + const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 || + filter_height != 1 || filter_width != 1; + auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 && + (need_dilated_scratchpad || need_non_dilated_scratchpad); + + if (_need_scratchpad) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + auto data_type_size = static_cast(luci_interpreter::getDataTypeSize(input_data_type)); + int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height * + filter_width * data_type_size; + luci_interpreter::Shape scratchpad_shape{scratchpad_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h new file mode 100644 index 0000000..f9ebfcf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h new file mode 100644 index 0000000..c9d1a29 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h new file mode 100644 index 0000000..3af6d07 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h new file mode 100644 index 0000000..cb365ff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h new file mode 100644 index 0000000..62970db --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h new file mode 100644 index 0000000..49ac35f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_GATHER_H +#define LUCI_INTERPRETER_PAL_GATHER_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Gather(const tflite::GatherParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_GATHER_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h new file mode 100644 index 0000000..6c663e2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h new file mode 100644 index 0000000..aac57f2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h new file mode 100644 index 0000000..e8209ba --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h new file mode 100644 index 0000000..54f7f09 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H +#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H + +#include + +namespace luci_interpreter_pal +{ +static inline void +LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h new file mode 100644 index 0000000..a32e3ee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H +#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H + +#include + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +static inline void LogSoftmax(const tflite::SoftmaxParams ¶ms, float input_scale, + const tflite::RuntimeShape &input_shape, const uint8 *input_data, + const tflite::RuntimeShape &output_shape, uint8 *output_data) +{ + tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h new file mode 100644 index 0000000..a8a9d4a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} + +template <> +inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const int64_t *input1_data, const tflite::RuntimeShape &input2_shape, + const int64_t *input2_data, const tflite::RuntimeShape &output_shape, + int64_t *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h new file mode 100644 index 0000000..797ffee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h new file mode 100644 index 0000000..bf1d795 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h new file mode 100644 index 0000000..b4c715d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU_H +#define LUCI_INTERPRETER_PAL_RELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data); +} + +template +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h new file mode 100644 index 0000000..bf2f91a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU6_H +#define LUCI_INTERPRETER_PAL_RELU6_H + +#include + +namespace luci_interpreter_pal +{ +static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data); +} + +template +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU6_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h new file mode 100644 index 0000000..7380081 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h new file mode 100644 index 0000000..74d1926 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h new file mode 100644 index 0000000..0ffba14 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + tflite::reference_ops::EvalIntegerSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, activation_state_data, output_shape, + output_data, scratchpad_data, output_temp_data, scale_1_a, + scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::EvalFloatSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, scratchpad_data, + activation_state_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h new file mode 100644 index 0000000..640a716 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SLICE_H +#define LUCI_INTERPRETER_PAL_SLICE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Slice(const tflite::SliceParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SLICE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h new file mode 100644 index 0000000..b197e79 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +template +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const In *input_data, + const tflite::RuntimeShape &output_shape, Out *output_data) +{ + tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h new file mode 100644 index 0000000..5e8de9b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h new file mode 100644 index 0000000..52d2a5b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h new file mode 100644 index 0000000..4d8da72 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPLIT_H +#define LUCI_INTERPRETER_PAL_SPLIT_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Split(const tflite::SplitParams ¶ms, const tflite::RuntimeShape &input_shape, + const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes, + Scalar *const *output_data) +{ + tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPLIT_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h new file mode 100644 index 0000000..04080d6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake new file mode 100644 index 0000000..185700c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake @@ -0,0 +1,82 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + + find_package(Threads REQUIRED) + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + # TODO put it back, I changed my mind. + # instead add sources with visitors in this library + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + + if(BUILD_ARM32_NEON) + # NOTE may need to revise this list for version upgrade + set(PAL_SOURCES ${PAL_SOURCES} + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc + ${TensorFlowRuySource_DIR}/ruy/allocator.cc + ${TensorFlowRuySource_DIR}/ruy/block_map.cc + ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc + ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc + ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc + ${TensorFlowRuySource_DIR}/ruy/ctx.cc + ${TensorFlowRuySource_DIR}/ruy/denormal.cc + ${TensorFlowRuySource_DIR}/ruy/frontend.cc + ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc + ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc + ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc + ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc + ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc + ${TensorFlowRuySource_DIR}/ruy/trmul.cc + ${TensorFlowRuySource_DIR}/ruy/tune.cc + ${TensorFlowRuySource_DIR}/ruy/wait.cc + ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc + ) + endif(BUILD_ARM32_NEON) + + add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES}) + set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst new file mode 100644 index 0000000..f0df58d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -0,0 +1,62 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h new file mode 100644 index 0000000..21e6329 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h new file mode 100644 index 0000000..cce3060 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h new file mode 100644 index 0000000..4dd77ff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h new file mode 100644 index 0000000..1397687 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + (void)input_data_type; + (void)params; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h new file mode 100644 index 0000000..8463e57 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h new file mode 100644 index 0000000..c9d1a29 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h new file mode 100644 index 0000000..15ff032 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h new file mode 100644 index 0000000..4089d0a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h new file mode 100644 index 0000000..048624d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include +#include + +namespace luci_interpreter_pal +{ +template +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h new file mode 100644 index 0000000..f84742a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h new file mode 100644 index 0000000..38a302f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h new file mode 100644 index 0000000..9ccd222 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h new file mode 100644 index 0000000..347a97a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h new file mode 100644 index 0000000..be5903a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h new file mode 100644 index 0000000..6046789 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h new file mode 100644 index 0000000..cc9f0fd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h new file mode 100644 index 0000000..f4d5a6e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h new file mode 100644 index 0000000..3bba668 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int n_rank = params.rank; + const int n_batch = input_shape.Dims(0); + const int n_input = input_shape.Dims(1); + const int n_filter = weight_feature_shape.Dims(0); + const int n_unit = n_filter / n_rank; + const int n_memory = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + int16_t *new_state_start = activation_state_data; + const int16_t *old_state_start = activation_state_data + 1; + const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Feature matmul. + { + const int32_t output_max = std::numeric_limits::max(); + const int32_t output_min = std::numeric_limits::min(); + int16_t *result_in_batch = activation_state_data + (n_memory - 1); + for (int b = 0; b < n_batch; b++) + { + const int8_t *matrix_ptr = weight_feature_data; + for (int r = 0; r < n_filter; r++) + { + int32_t dot_prod = 0; + const int8_t *vector_in_batch = input_data + b * n_input; + for (int c = 0; c < n_input; c++) + { + dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp); + } + dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b); + dot_prod = std::min(std::max(output_min, dot_prod), output_max); + // This assumes state is symmetrically quantized. Otherwise last bit of + // state should be initialized to its zero point and accumulate the + // dot_prod. + // Equivalent as the following: + // result_in_batch = zero point, which happens to be zero. + // result_in_batch += dot_prod_56. + *result_in_batch = dot_prod; + result_in_batch += n_memory; + } + } + } + + // Time. + { + for (int b = 0; b < n_batch; ++b) + { + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Perform batched vector dot product: + const int16_t *vector1_ptr = weight_time_data; + const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter; + + for (int i = 0; i < n_filter; i++) + { + *scratch_ptr_batch = 0; + for (int j = 0; j < n_memory; j++) + { + *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; + } + scratch_ptr_batch++; + } + } + } + + // Reduce, add bias, rescale, activation. + { + // Add bias. + if (bias_data) + { + // Vector batch assign: + for (int i = 0; i < n_batch; ++i) + { + int32_t *output_ptr = output_temp_data + i * n_unit; + const int32_t *bias_ptr = bias_data; + for (int j = 0; j < n_unit; ++j) + { + *output_ptr++ = *bias_ptr++; + } + } + } + else + { + int32_t *output_ptr = output_temp_data; + for (int i = 0; i < n_batch * n_unit; ++i) + { + *output_ptr++ = 0; + } + } + + // Reduce. + for (int b = 0; b < n_batch; ++b) + { + int32_t *output_temp_ptr = output_temp_data + b * n_unit; + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Reduction sum vector + for (int i = 0; i < n_unit; ++i) + { + for (int j = 0; j < n_rank; ++j) + { + output_temp_ptr[i] += *scratch_ptr_batch++; + } + } + } + + // Rescale. + const int32_t output_max = std::numeric_limits::max(); + const int32_t output_min = std::numeric_limits::min(); + for (int i = 0; i < n_batch * n_unit; ++i) + { + int32_t x1 = output_temp_data[i]; + int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b); + int32_t x3 = x2 + output_zp; + int32_t x4 = std::min(std::max(output_min, x3), output_max); + output_data[i] = static_cast(x4); + } + } +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for mcu platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h new file mode 100644 index 0000000..9838b54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h new file mode 100644 index 0000000..fdddaa9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h new file mode 100644 index 0000000..816b7f6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h new file mode 100644 index 0000000..ea57578 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake new file mode 100644 index 0000000..907d51d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake @@ -0,0 +1,56 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + #find_package(Threads REQUIRED) + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + # TODO put it back, I changed my mind. + # instead add sources with visitors in this library + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) + add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES}) + set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_mcu_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal) + #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake new file mode 100644 index 0000000..f411f38 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/requires.cmake @@ -0,0 +1 @@ +require(luci) diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp new file mode 100644 index 0000000..6ad1f32 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" + +namespace luci_interpreter +{ + +BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize) +{ + int32_t p = lowerLog2(memSize); + + // We assume that the requested size of memory does not exceed 4 GB + assert(p < 32); + memSize = 1 << p; + + _start_block = reinterpret_cast(memory_start); + _start_block->size = memSize - sizeof(Block); + _start_block->is_free = true; + _start_block->self = _start_block; + _num_blocks = 0; + _size = _start_block->size; + + for (auto &_free_block : _free_blocks) + _free_block = nullptr; + + addToBlocks(_start_block, p); +} + +void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + const size_t element_size = getDataTypeSize(tensor.element_type()); + const int32_t num_elements = tensor.shape().num_elements(); + auto size = num_elements * element_size; + auto footprint = size + sizeof(Block); + auto l = (footprint & (footprint - 1)) == 0 + ? lowerLog2(footprint) + : lowerLog2(footprint) + 1; // check footprint is pow_of_2 + + while (l < 32 && !_free_blocks[l]) + l++; + + assert(l < 32); + + Block *tmp; + tmp = _free_blocks[l]; + removeFromBlocks(tmp, l); + + while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block)) + { + divideBlock(tmp, l); + l--; + } + + tmp->is_free = false; + tmp->self = tmp; + _num_blocks++; + + auto *data = (uint8_t *)(tmp + 1); + tensor.set_data_buffer(data); +} + +void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + auto data = tensor.data(); + auto *tmp = (Block *)((uint8_t *)data - sizeof(Block)); + + assert(tmp->self == tmp); + + tmp->is_free = true; + addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block))); + + while (tmp) + if (tmp->size == _size) + break; + else + tmp = mergeBlock(tmp); + + _num_blocks--; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp new file mode 100644 index 0000000..29fb767 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" +#include + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +TEST(BuddyMemoryManager, basic) +{ + auto mem_pool = std::make_unique(200); + auto buddy_memory_manager = std::make_unique(mem_pool.get(), 130); + Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor"); + + buddy_memory_manager->allocate_memory(first_tensor); + + uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + + first_tensor.writeData(data_1, 8); + uint8_t array_1[8]; + first_tensor.readData(array_1, 8); + for (int i = 0; i < 8; i++) + { + EXPECT_EQ(data_1[i], array_1[i]); + } + + Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor"); + buddy_memory_manager->allocate_memory(second_tensor); + + uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}}; + second_tensor.writeData(data_2, 10); + + uint8_t array_2[2][5]; + second_tensor.readData(array_2, 10); + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 5; j++) + { + EXPECT_EQ(data_2[i][j], array_2[i][j]); + } + } + + buddy_memory_manager->release_memory(first_tensor); + EXPECT_EQ(first_tensor.data(), nullptr); + + buddy_memory_manager->release_memory(second_tensor); + EXPECT_EQ(second_tensor.data(), nullptr); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt new file mode 100644 index 0000000..997b75a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt @@ -0,0 +1,61 @@ +include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake") + +initialize_pal() + +if (NOT PAL_INITIALIZED) + message("PAL Failed to initialize, skip luci-interpreter") + return() +endif() + +message(STATUS "LUCI INTERPRETER BEGIN") + +set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}") + +add_subdirectory(core) +message(STATUS "LUCI INTERPRETER CORE") +add_subdirectory(kernels) +message(STATUS "LUCI INTERPRETER KERNELS") +add_subdirectory(loader) +message(STATUS "LUCI INTERPRETER LOADER") +add_subdirectory(import) +message(STATUS "LUCI INTERPRETER IMPORT") + +message(STATUS "LUCI INTERPTER INITALIZED") + +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" + Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp) + +if (NOT LUCI_INTERPRETER_STATIC) + add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES}) +else () + add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES}) +endif () + +set(TEST_SOURCES BuddyMemoryManager.test.cpp) + +target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_BINARY} + PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE} + PRIVATE nncc_common) + +install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib) +install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(buddy_manager_test ${TEST_SOURCES}) +target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY}) diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp new file mode 100644 index 0000000..8cf272e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/Interpreter.h" +#include "luci_interpreter/SimpleMemoryManager.h" + +#include "loader/ModuleLoader.h" + +#include + +namespace luci_interpreter +{ + +namespace +{ + +class EventNotifierImpl final : public EventNotifier +{ +public: + EventNotifierImpl(const RuntimeToIR &runtime_to_ir, + const std::vector &observers) + : _runtime_to_ir(runtime_to_ir), _observers(observers) + { + } + + void postTensorWrite(const Tensor *tensor) override + { + assert(tensor != nullptr); + for (const auto &observer : _observers) + { + observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor); + } + } + + void preOperatorExecute(const Kernel *kernel) override + { + assert(kernel != nullptr); + for (const auto &observer : _observers) + { + observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel)); + } + } + + void postOperatorExecute(const Kernel *kernel) override + { + assert(kernel != nullptr); + for (const auto &observer : _observers) + { + observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel)); + } + } + +private: + const RuntimeToIR &_runtime_to_ir; + const std::vector &_observers; +}; + +} // namespace + +Interpreter::Interpreter(const luci::Module *module) +{ + _runtime_to_ir = std::make_unique(); + _event_notifier = std::make_unique(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique(_event_notifier.get()); + + _default_memory_manager = std::make_unique(); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + _default_memory_manager.get()); + loader.load(); +} + +Interpreter::Interpreter(const luci::Module *module, + luci_interpreter::IMemoryManager *memory_manager) +{ + assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead"); + + _runtime_to_ir = std::make_unique(); + _event_notifier = std::make_unique(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique(_event_notifier.get()); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + memory_manager); + loader.load(); +} + +Interpreter::~Interpreter() = default; + +void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data, + size_t data_size) +{ + Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()]; + if (tensor == nullptr) + { + const std::string &name = input_node->name(); + throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\"."); + } + if (data != nullptr) + tensor->writeData(data, data_size); +} + +void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data, + size_t data_size) +{ + Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()]; + if (tensor == nullptr) + { + const std::string &name = output_node->name(); + throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\"."); + } + if (data != nullptr) + tensor->readData(data, data_size); +} + +void Interpreter::interpret() { _runtime_module->execute(); } + +void Interpreter::attachObserver(ExecutionObserver *observer) +{ + if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend()) + throw std::runtime_error("Observer is already attached."); + _observers.push_back(observer); +} + +ExecutionObserver::~ExecutionObserver() = default; + +void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {} + +void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {} + +void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp new file mode 100644 index 0000000..230e398 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/SimpleMemoryManager.h" + +namespace luci_interpreter +{ + +void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + tensor.set_data_buffer(data); +} + +void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_data_allocated()) + { + tensor.set_data_buffer(nullptr); + return; + } + auto data = tensor.data(); + delete[] data; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp new file mode 100644 index 0000000..73a8199 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/StaticMemoryManager.h" + +namespace luci_interpreter +{ + +void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + int32_t offset = tensor.get_offset(); + assert(offset >= 0); + auto tensor_ptr = _buffer_ptr + offset; + tensor.set_data_buffer(tensor_ptr); +} + +void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp new file mode 100644 index 0000000..3beeee5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ + +void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + allocations.push_back(data); + tensor.set_data_buffer(data); +} + +void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt new file mode 100644 index 0000000..c2471e0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt @@ -0,0 +1,19 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h" + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h" + EventNotifier.h + Kernel.h + KernelParams.h + RuntimeGraph.h + RuntimeGraph.cpp + RuntimeModule.h + Tensor.cpp) + +add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang) +target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h new file mode 100644 index 0000000..5c4fbd3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H +#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H + +namespace luci_interpreter +{ + +// Used at execution stage to tell the interpreter that the runtime state has changed in some way. +class EventNotifier +{ +public: + virtual ~EventNotifier() = default; + + virtual void postTensorWrite(const Tensor *tensor) = 0; + virtual void preOperatorExecute(const Kernel *kernel) = 0; + virtual void postOperatorExecute(const Kernel *kernel) = 0; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h new file mode 100644 index 0000000..a7c4a42 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_KERNEL_H +#define LUCI_INTERPRETER_CORE_KERNEL_H + +#include "luci_interpreter/core/Tensor.h" + +#include + +namespace luci_interpreter +{ + +// Base class for all kernels. +class Kernel +{ +protected: + Kernel(std::vector inputs, std::vector outputs) + : _inputs(std::move(inputs)), _outputs(std::move(outputs)) + { + } + +public: + virtual ~Kernel() = default; + + const std::vector &getInputTensors() const { return _inputs; } + const std::vector &getOutputTensors() const { return _outputs; } + + // Configures the kernel. + // This function is currently called once for each kernel during interpreter construction, + // which makes it a convenient place for preparing (resizing) output tensors. + virtual void configure() = 0; + + // Executes the kernel. + virtual void execute() const = 0; + +protected: + // NOTE Prefer not to use these in derived classes. + const std::vector _inputs; + const std::vector _outputs; +}; + +// Base class for kernels with parameters. +template class KernelWithParams : public Kernel +{ +protected: + KernelWithParams(std::vector inputs, std::vector outputs, + const Params ¶ms) + : Kernel(std::move(inputs), std::move(outputs)), _params(params) + { + } + +public: + const Params ¶ms() const { return _params; } + +protected: + const Params _params; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_KERNEL_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h new file mode 100644 index 0000000..6c0220c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H +#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H + +#include +#include +#include +#include + +#include +#include + +namespace luci_interpreter +{ + +// Inject commonly used types into `luci_interpreter` namespace for convenience. +using Activation = luci::FusedActFunc; +using Padding = luci::Padding; +using MirrorPadMode = luci::MirrorPadMode; + +struct AddParams +{ + Activation activation; +}; + +struct ArgMaxParams +{ + DataType output_type; +}; + +struct BatchMatMulParams +{ + bool adj_x; + bool adj_y; +}; + +struct ConcatenationParams +{ + int axis; + Activation activation; +}; + +struct Conv2DParams +{ + Padding padding; + int32_t stride_height; + int32_t stride_width; + int32_t dilation_height_factor; + int32_t dilation_width_factor; + Activation activation; +}; + +struct DepthToSpaceParams +{ + int block_size; +}; + +struct DepthwiseConv2DParams +{ + Padding padding; + int32_t depth_multiplier; // TODO Remove, as it can be calculated. + int32_t stride_height; + int32_t stride_width; + int32_t dilation_height_factor; + int32_t dilation_width_factor; + Activation activation; +}; + +struct DivParams +{ + Activation activation; +}; + +struct FullyConnectedParams +{ + Activation activation; + bool keep_num_dims = false; +}; + +struct GatherParams +{ + int32_t axis; + int32_t batch_dims; +}; + +struct InstanceNormParams +{ + float epsilon; + Activation activation; +}; + +struct L2NormParams +{ + Activation activation; +}; + +struct LeakyReluParams +{ + float alpha; +}; + +struct LocalResponseNormalizationParams +{ + int32_t radius; + float bias; + float alpha; + float beta; +}; + +struct MirrorPadParams +{ + MirrorPadMode mode; +}; + +struct MulParams +{ + Activation activation; +}; + +struct OneHotParams +{ + int32_t axis; +}; + +struct PackParams +{ + int32_t values_count; + int32_t axis; +}; + +struct Pool2DParams +{ + Padding padding; + int32_t filter_height; + int32_t filter_width; + int32_t stride_height; + int32_t stride_width; + Activation activation; +}; + +struct ReducerParams +{ + bool keep_dims; +}; + +struct ResizeBilinearParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ResizeNearestNeighborParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ShapeParams +{ + loco::DataType out_type; +}; + +struct SubParams +{ + Activation activation; +}; + +struct SVDFParams +{ + bool asymmetric_quantize_inputs; + int32_t svdf_rank; + Activation activation; +}; + +struct SpaceToDepthParams +{ + int block_size; +}; + +struct SoftmaxParams +{ + float beta; +}; + +struct StridedSliceParams +{ + int32_t begin_mask; + int32_t end_mask; + int32_t ellipsis_mask; + int32_t new_axis_mask; + int32_t shrink_axis_mask; +}; + +struct SqueezeParams +{ + std::vector squeeze_dims; +}; + +struct TransposeConvParams +{ + Padding padding; + int32_t stride_height; + int32_t stride_width; +}; + +struct UnpackParams +{ + int axis; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp new file mode 100644 index 0000000..c2f8d2e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeGraph.h" + +#include "core/RuntimeModule.h" + +#include +#include + +namespace luci_interpreter +{ + +class RuntimeGraph::TensorAllocPlan +{ + std::vector> _alloc_plan; + std::vector> _dealloc_plan; + bool _valid = false; + IMemoryManager *_memory_manager; + +public: + explicit TensorAllocPlan(IMemoryManager *memory_manager); + void invalidate() { _valid = false; } + bool isValid() const { return _valid; } + void build(const RuntimeGraph &graph); + void allocate(size_t kernel_index) const; + void deallocate(size_t kernel_index) const; +}; + +RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager) + : _memory_manager(memory_manager) +{ +} + +void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph) +{ + invalidate(); + using Lifetime = std::pair; + std::unordered_map lifetimes; + const size_t num_kernels = graph._kernels.size(); + for (size_t index = 0; index < num_kernels; ++index) + { + const auto &kernel = graph._kernels[index]; + for (const Tensor *tensor : kernel->getInputTensors()) + { + auto nc_tensor = const_cast(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = index; + } + for (Tensor *tensor : kernel->getOutputTensors()) + { + assert(lifetimes.count(tensor) == 0); + lifetimes[tensor] = Lifetime(index, index); + } + } + for (const Tensor *tensor : graph.getOutputTensors()) + { + auto nc_tensor = const_cast(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = num_kernels; + } + _alloc_plan.assign(num_kernels, std::vector()); + _dealloc_plan.assign(num_kernels + 1, std::vector()); + for (const auto &item : lifetimes) + { + _alloc_plan[item.second.first].push_back(item.first); + _dealloc_plan[item.second.second].push_back(item.first); + } + _valid = true; +} + +void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _alloc_plan.size()); + for (Tensor *tensor : _alloc_plan[kernel_index]) + { + _memory_manager->allocate_memory(*tensor); + } +} + +void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _dealloc_plan.size()); + for (Tensor *tensor : _dealloc_plan[kernel_index]) + { + _memory_manager->release_memory(*tensor); + } +} + +RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager) + : _owning_module(owning_module), _memory_manager(memory_manager), + _tensor_alloc_plan(std::make_unique(memory_manager)) +{ +} + +RuntimeGraph::~RuntimeGraph() +{ + for (auto &tensor : _tensors) + { + if (tensor->is_data_allocated()) + _memory_manager->release_memory(*tensor); + } +} + +Tensor *RuntimeGraph::addTensor(std::unique_ptr &&tensor) +{ + assert(tensor != nullptr); + _tensors.push_back(std::move(tensor)); + return _tensors.back().get(); +} + +void RuntimeGraph::setInputTensors(const std::vector &input_tensors) +{ + assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(), + [](Tensor *tensor) { return tensor != nullptr; })); + _input_tensors = input_tensors; +} + +void RuntimeGraph::setOutputTensors(const std::vector &output_tensors) +{ + assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(), + [](Tensor *tensor) { return tensor != nullptr; })); + _output_tensors = output_tensors; +} + +void RuntimeGraph::configureAllocations(Tensor *tensor) +{ + _memory_manager->allocate_memory(*tensor); +} + +void RuntimeGraph::addKernel(std::unique_ptr &&kernel) +{ + assert(kernel != nullptr); + _kernels.push_back(std::move(kernel)); + _tensor_alloc_plan->invalidate(); +} + +void RuntimeGraph::execute() const +{ + if (!_tensor_alloc_plan->isValid()) + _tensor_alloc_plan->build(*this); + + EventNotifier *event_notifier = _owning_module->getEventNotifier(); + + // Notify the observers that the input tensors have changed. + if (event_notifier != nullptr) + { + for (const Tensor *input_tensor : getInputTensors()) + { + if (input_tensor->is_observable()) + event_notifier->postTensorWrite(input_tensor); + } + } + + for (size_t index = 0; index < _kernels.size(); ++index) + { + const auto &kernel = _kernels[index]; + if (event_notifier != nullptr) + { + event_notifier->preOperatorExecute(kernel.get()); + } + + // TODO The `configure` method should only be called if the outputs of an operator need to be + // resized. + kernel->configure(); + + // Preallocate outputs in advance instead of relying on automatic allocation + _tensor_alloc_plan->allocate(index); + + kernel->execute(); + + if (event_notifier != nullptr) + { + event_notifier->postOperatorExecute(kernel.get()); + } + + for (const Tensor *tensor : kernel->getOutputTensors()) + { + if (event_notifier != nullptr && tensor->is_observable()) + { + event_notifier->postTensorWrite(tensor); + } + } + _tensor_alloc_plan->deallocate(index); + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h new file mode 100644 index 0000000..8184e24 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H +#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H + +#include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" +#include "core/Kernel.h" + +#include +#include + +namespace luci_interpreter +{ + +class RuntimeModule; + +class RuntimeGraph +{ +private: + class TensorAllocPlan; + friend class TensorAllocPlan; + +public: + explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager); + ~RuntimeGraph(); + + Tensor *addTensor(std::unique_ptr &&tensor); + + void setInputTensors(const std::vector &input_tensors); + void setOutputTensors(const std::vector &output_tensors); + + void configureAllocations(Tensor *tensor); + + const std::vector &getInputTensors() const { return _input_tensors; } + const std::vector &getOutputTensors() const { return _output_tensors; } + + void addKernel(std::unique_ptr &&kernel); + + void execute() const; + +private: + IMemoryManager *_memory_manager; + RuntimeModule *_owning_module; + std::vector> _tensors; + std::vector _input_tensors; + std::vector _output_tensors; + + // Kernels in execution order. + std::vector> _kernels; + // Tensors that are not used anymore after given op + std::unique_ptr _tensor_alloc_plan; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h new file mode 100644 index 0000000..78873b0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H +#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H + +#include "core/RuntimeGraph.h" +#include "core/EventNotifier.h" +#include "luci_interpreter/MemoryManager.h" + +#include +#include + +namespace luci_interpreter +{ + +class RuntimeModule +{ +public: + explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {} + + EventNotifier *getEventNotifier() const { return _event_notifier; } + + RuntimeGraph *addGraph(IMemoryManager *memory_manager) + { + _graphs.push_back(std::make_unique(this, memory_manager)); + return _graphs.back().get(); + } + + const std::vector &getInputTensors() const { return getMainGraph()->getInputTensors(); } + const std::vector &getOutputTensors() const + { + return getMainGraph()->getOutputTensors(); + } + + void execute() const { getMainGraph()->execute(); } + +private: + RuntimeGraph *getMainGraph() const { return _graphs[0].get(); } + + EventNotifier *const _event_notifier; + std::vector> _graphs; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp new file mode 100644 index 0000000..3c3c5ff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/core/Tensor.h" + +#include +#include + +namespace luci_interpreter +{ + +Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization, + std::string name) + : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)), + _name(std::move(name)), _data_allocated(false) +{ +} + +void Tensor::readData(void *data_ptr, size_t data_size) const +{ + const size_t element_size = getDataTypeSize(element_type()); + const int32_t num_elements = shape().num_elements(); + if (data_size != num_elements * element_size) + { + throw std::invalid_argument("Invalid data size."); + } + assert(data_ptr != nullptr); + std::memcpy(data_ptr, data(), data_size); +} + +void Tensor::writeData(const void *data_ptr, size_t data_size) +{ + const size_t element_size = getDataTypeSize(element_type()); + const int32_t num_elements = shape().num_elements(); + if (data_size != num_elements * element_size) + { + throw std::invalid_argument("Invalid data size."); + } + assert(data_ptr != nullptr); + std::memcpy(data(), data_ptr, data_size); +} + +void Tensor::resize(const Shape &new_shape) { _shape = new_shape; } + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt new file mode 100644 index 0000000..dd9733f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt @@ -0,0 +1,15 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h" + GraphBuilderRegistry.cpp) + +# include specific builders +file(GLOB_RECURSE NODES "Nodes/*") +list(APPEND SOURCES ${NODES}) + +add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) + +target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import) diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp new file mode 100644 index 0000000..a33bca6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci_interpreter/GraphBuilderRegistry.h" +#include "Nodes/CircleReferencingConst.h" + +namespace luci_interpreter +{ + +std::unique_ptr source_without_constant_copying() +{ + auto builder = std::make_unique(); + { + // redefine NodeBuilder of BUFFER type + builder->add(std::make_unique()); + } + + return builder; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp new file mode 100644 index 0000000..14e90f2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleReferencingConst.h" + +#include + +namespace +{ + +// helper struct which describes data loaded to custom_options of CircleReferencingConst node +struct ConstDataReference +{ + const uint8_t *data = nullptr; + uint32_t size = 0; +}; + +} // namespace + +namespace luci_interpreter +{ +using namespace luci; + +CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index, + GraphBuilderContext *context) const +{ + assert(tensor_index >= 0); + + const auto graph = context->graph(); + const auto reader = context->reader(); + const auto tensors = reader->tensors(); + auto const const_tensor = tensors[tensor_index]; + assert(const_tensor != nullptr); + if (const_tensor->is_variable()) + { + // Create CircleVariable for variable + return nullptr; + } + + auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + auto const const_dims = wrap(const_tensor->shape()); // in NHWC + if (const_dims.empty() && buffer.empty()) + { + // unknown shape tensor and scalar tensor + return nullptr; + } + + // if tensor_index is used as output to some other operator, this is not a constant + auto tensoroutputs = context->tensoroutputs(); + if (tensoroutputs->find(tensor_index)) + { + // other operator output tensor + return nullptr; + } + + uint32_t num_elements = 1; + for (uint32_t r = 0; r < const_dims.size(); ++r) + { + num_elements = num_elements * const_dims[r]; + } + + if (buffer.empty() && num_elements > 0) + { + // normal empty tensor + return nullptr; + } + + // create CircleReferencingConst + auto custom_node = graph->nodes()->create(0, 1); + { + custom_node->custom_code("CircleReferencingConst"); + + copy_tensor_attributes(const_tensor, custom_node); + custom_node->shape_status(luci::ShapeStatus::VALID); + + // custom options stores size of buffer and pointer's value to buffer's data + { + std::vector custom_options(sizeof(ConstDataReference)); + { + auto &const_data_ref = *reinterpret_cast(custom_options.data()); + const_data_ref = {buffer.data(), buffer.size()}; + } + custom_node->custom_options(custom_options); + } + } + + // Output of CircleCustom node presented with CircleConstNode + auto out_node = graph->nodes()->create(); + { + out_node->index(0); + out_node->input(custom_node); + + copy_tensor_attributes(const_tensor, out_node); + out_node->shape_status(luci::ShapeStatus::VALID); + } + + return out_node; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h new file mode 100644 index 0000000..ed8f951 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ +#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ + +#include + +#include + +namespace luci_interpreter +{ +using namespace luci; + +/** + * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer. + */ +class CircleReferencingConstNodeBuilder : public TypedNodeBuilder +{ +public: + CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final; +}; + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp new file mode 100644 index 0000000..d7bf308 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Add.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms) + : KernelWithParams({input1, input2}, {output}, params) +{ +} + +void Add::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + if (input1()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1); + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); + } + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Add::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Add::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +template void Add::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +void Add::evalQuantized() const +{ + const auto input1_scale = static_cast(input1()->scale()); + const auto input2_scale = static_cast(input2()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const int left_shift = 20; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + params.left_shift = left_shift; + // The kernel expects inputs' zero points to be negated. + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input1_multiplier = input1_multiplier; + params.input1_shift = input1_shift; + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.input2_multiplier = input2_multiplier; + params.input2_shift = input2_shift; + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +void Add::evalQuantizedS16() const +{ + const auto input1_scale = static_cast(input1()->scale()); + const auto input2_scale = static_cast(input2()->scale()); + const auto output_scale = static_cast(output()->scale()); + + constexpr int left_shift = 12; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [input1_multiplier, input1_shift, // + input2_multiplier, input2_shift, // + output_multiplier, output_shift, // + activation_min, activation_max](int16_t input1_val, int16_t input2_val) { + const int32_t shifted_input1_val = static_cast(input1_val) << left_shift; + const int32_t shifted_input2_val = static_cast(input2_val) << left_shift; + const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, input1_multiplier, input1_shift); + const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, input2_multiplier, input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, output_multiplier, output_shift); + const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output)); + return static_cast(clamped_output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), fn); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.h b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h new file mode 100644 index 0000000..91d95b6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ADD_H +#define LUCI_INTERPRETER_KERNELS_ADD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Add : public KernelWithParams +{ +public: + Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + void evalQuantizedS16() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ADD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp new file mode 100644 index 0000000..b8b1c30 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Add.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class AddTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +// for quantized Add, the error shouldn't exceed step +float GetTolerance(float min, float max) +{ + float kQuantizedStep = (max - min) / 255.0; + return kQuantizedStep; +} + +TEST_F(AddTest, Uint8) +{ + std::initializer_list base_shape = {2, 3, 1, 2}; + std::initializer_list base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::initializer_list test_shapes[] = { + {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::initializer_list test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::initializer_list output_shapes[] = { + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + std::vector> output_data = { + {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f, + -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f}, + {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f}, + {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f, + -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f}, + {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}}; + float kQuantizedTolerance = GetTolerance(-3.f, 3.f); + std::pair quant_param = quantizationParams(-3.f, 3.f); + for (int i = 0; i < output_data.size(); i++) + { + Tensor input1_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } + // Re-run with exchanged inputs. + for (int i = 0; i < output_data.size(); i++) + { + Tensor input1_tensor = makeInputTensor( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +TEST_F(AddTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> test_outputs = { + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + std::vector input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } +} + +template void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> test_outputs = { + {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1, + 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7}, + {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7}, + {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0, + 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7}, + {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}}; + std::vector input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(AddTest, SInt32) +{ + CheckInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt64) +{ + CheckInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector> ref_outputs = { + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(AddTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(AddTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp new file mode 100644 index 0000000..6561a17 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ArgMax.h" +#include "kernels/Utils.h" +#include "PALArgMax.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms) + : KernelWithParams({input, axis}, {output}, params) +{ +} + +void ArgMax::configure() +{ + assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64); + assert(input()->shape().num_dims() >= 1); + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + Shape output_shape(num_dims - 1); + + // If axis value is negative, then update by adding input_shape's num_dims. + // If updated value also negative, then assert. + assert(axis()->shape().num_elements() == 1); + int axis_value = getTensorData(axis())[0]; + if (axis_value < 0) + axis_value = axis_value + num_dims; + assert(axis_value >= 0); + + int j = 0; + for (int i = 0; i < num_dims; i++) + { + if (i == axis_value) + continue; + output_shape.dim(j++) = input_shape.dim(i); + } + + assert(output()->element_type() == _params.output_type); + + output()->resize(output_shape); +} + +void ArgMax::execute() const +{ + +#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ + luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData(input()), \ + getTensorData(axis()), getTensorShape(output()), \ + getTensorData(output()), std::greater()) + if (axis()->element_type() == DataType::S32) + { + switch (_params.output_type) + { + case DataType::S32: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int32_t, int32_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + case DataType::S64: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int32_t, int64_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + default: + throw std::runtime_error("Unsupported output type."); + } + } + else + { + switch (_params.output_type) + { + case DataType::S32: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int64_t, int32_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + case DataType::S64: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int64_t, int64_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + default: + throw std::runtime_error("Unsupported output type."); + } + } +#undef TF_LITE_ARG_MAX +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h new file mode 100644 index 0000000..c851b58 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H +#define LUCI_INTERPRETER_KERNELS_ARGMAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ArgMax : public KernelWithParams +{ +public: + ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp new file mode 100644 index 0000000..474f4b3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ArgMax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, + std::initializer_list dimension_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list dimension_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor dimension_tensor = + makeInputTensor(dimension_shape, dimension_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType()); + + ArgMaxParams params{}; + params.output_type = getElementType(); + ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template class ArgMaxTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(ArgMaxTest, DataTypes); + +TYPED_TEST(ArgMaxTest, Simple) +{ + Check(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 1}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{1}); + Check(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 1}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{1}); +} + +TYPED_TEST(ArgMaxTest, MultiDimensions) +{ + Check(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 2}, + /*input_data=*/ + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{3, 1}); + Check(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 2}, + /*input_data=*/ + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{3, 1}); +} + +TEST(ArgMaxTest, UnsupportedType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor dimension_tensor = makeInputTensor({}, {3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + ArgMaxParams params{}; + params.output_type = DataType::U8; + ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp new file mode 100644 index 0000000..d3bade9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/AveragePool2D.h" + +#include "kernels/Utils.h" + +#include "PALAveragePool2d.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms) + : KernelWithParams({input}, {output, scratchpad}, params) +{ +} + +void AveragePool2D::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input Tensor and Output Tensor Type must be same"); + } + if (input()->shape().num_dims() != 4) + { + throw std::runtime_error("Input Tensor Shape must be 4-D"); + } + const Shape &input_shape = input()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + + _padding_height = + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + _padding_width = + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + else if (input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + output()->resize({batches, output_height, output_width, depth}); + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), + getTensorShape(input()), getTensorShape(output())); +} + +void AveragePool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalSInt16(); + break; + case DataType::S8: + evalSInt8(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void AveragePool2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void AveragePool2D::evalQuantized() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::AveragePool(params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); +} + +void AveragePool2D::evalSInt8() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data(); + + luci_interpreter_pal::AveragePool( + params, getTensorShape(input()), getTensorData(input()), getTensorShape(output()), + getTensorData(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void AveragePool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::AveragePool( + params, getTensorShape(input()), getTensorData(input()), // + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h new file mode 100644 index 0000000..2c8fe16 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class AveragePool2D : public KernelWithParams +{ +public: + AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalSInt16() const; + void evalSInt8() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp new file mode 100644 index 0000000..478bfa6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/AveragePool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class AveragePool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(AveragePool2DTest, Float) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 0, 1.5, // + 4.5, 6, // + }; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); +} + +TEST_F(AveragePool2DTest, Uint8_0) +{ + std::vector input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + std::pair quant_param = quantizationParams(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); +} + +TEST_F(AveragePool2DTest, Uint8_1) +{ + std::vector input_data{ + 0, 6, 12, 4, // + 3, 2, 10, 7, // + }; + + std::pair quant_param = quantizationParams(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); +} + +TEST_F(AveragePool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector ref_output_shape{1, 2, 2, 1}; + std::vector input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + std::vector ref_output_data{ + 0, 1.5, // + 4.5, 6, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, SInt8) +{ + Shape input_shape{1, 4, 5, 1}; + std::vector ref_output_shape{1, 2, 2, 1}; + std::vector input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5, + 7, -6, -1, 9, -2, 0, -5, 11, -1, -7}; + std::vector ref_output_data{ + 0, 2.5, // + 1, 1.5, // + }; + + std::pair quant_param = quantizationParams(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 3, 5}; + std::vector input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AveragePool2DTest, In_Out_Type_NEG) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AveragePool2DTest, Quant_Param_NEG) +{ + std::vector input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + + std::pair quant_param1 = quantizationParams(-15.9375f, 15.9375f); + std::pair quant_param2 = quantizationParams(-7.875f, 7.875f); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp new file mode 100644 index 0000000..24ca229 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/Utils.h" + +#include "PALBatchMatMul.h" + +#include + +#include + +namespace +{ + +tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape) +{ + tflite::RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, + Tensor *y_tmp, const BatchMatMulParams ¶ms) + : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params) +{ +} + +void BatchMatMul::configure() +{ + auto lhs = x(); + auto rhs = y(); + auto adj_x = params().adj_x; + auto adj_y = params().adj_y; + + // TODO Support non-float types + if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32) + throw std::runtime_error("Unsupported type."); + + LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type()); + + auto lhs_rank = lhs->shape().num_dims(); + auto rhs_rank = rhs->shape().num_dims(); + LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4); + LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4); + + auto lhs_scratchpad = temp_lhs(); + auto rhs_scratchpad = temp_rhs(); + luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs), + getTensorShape(rhs)); + + auto output_rank = std::max(lhs_rank, rhs_rank); + + auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs)); + auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs)); + + // Ensure any batch dimensions obey broacasting rules. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + if (lhs_dim != rhs_dim) + { + if (lhs_dim != 1) + { + LUCI_INTERPRETER_CHECK(rhs_dim == 1); + } + } + } + + // Ensure other dimensions work for matrix multiplication. + int accum_dim_lhs = + adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1); + int accum_dim_rhs = + adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2); + LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs); + + Shape output_shape(output_rank); + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) + { + broadcast_dim = rhs_dim; + } + output_shape.dim(i) = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index); + output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index); + + output()->resize(output_shape); +} + +void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out) +{ + tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in)); + tflite::RuntimeShape shape(getTensorShape(tensor_in)); + tflite::TransposeParams params; + int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) + { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + switch (tensor_in->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, shape, getTensorData(tensor_in), + transposed_shape, getTensorData(tensor_out)); + break; + default: + throw std::runtime_error("Only suppport fp32 BatchMatMul for now."); + } +} + +void BatchMatMul::execute() const +{ + auto lhs = x(); + auto rhs = y(); + + bool adj_x = params().adj_x; + bool adj_y = params().adj_y; + + auto orig_lhs_shape = getTensorShape(lhs); + auto orig_rhs_shape = getTensorShape(rhs); + + auto rhs_tensor = adj_y ? rhs : temp_rhs(); + auto lhs_tensor = adj_x ? temp_lhs() : lhs; + if (not adj_y) + { + TransposeRowsColumns(rhs, temp_rhs()); + } + if (adj_x) + { + TransposeRowsColumns(lhs, temp_lhs()); + } + tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); + tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); + + switch (x()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData(rhs_tensor), lhs_shape, + getTensorData(lhs_tensor), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h new file mode 100644 index 0000000..744f497 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H +#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchMatMul : public KernelWithParams +{ +public: + BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp, + const BatchMatMulParams ¶ms); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + Tensor *temp_lhs() const { return _outputs[1]; } + Tensor *temp_rhs() const { return _outputs[2]; } +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp new file mode 100644 index 0000000..edfa3a6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class BatchMatMulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(BatchMatMulTest, Float) +{ + std::vector lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint) +{ + std::vector lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18}; + Tensor lhs_tensor = + makeInputTensor({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 4, 3}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = true; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint) +{ + std::vector lhs_data = {1, 4, 2, 5, 3, 6}; + std::vector rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor({1, 3, 2}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = true; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_BatchSizeTwo) +{ + std::vector lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor({2, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({2, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632., + 767., 800., 833., 866.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_DiffBatch) +{ + std::vector lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor({2, 1, 6}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 6, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4})); +} + +TEST_F(BatchMatMulTest, Invalid_Shape_NEG) +{ + Tensor lhs_tensor = + makeInputTensor({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Batch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank_NEG) +{ + Tensor lhs_tensor = makeInputTensor({4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank2_NEG) +{ + Tensor lhs_tensor = + makeInputTensor({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, TypeMisMatch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::U8, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp new file mode 100644 index 0000000..bd315ff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/Utils.h" + +#include "PALBatchToSpaceND.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +namespace +{ +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; +} // namespace + +BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output) + : Kernel({input, block_shape, crops}, {output}) +{ +} + +void BatchToSpaceND::configure() +{ + + const auto *block_shape_data = block_shape()->data(); + const auto *crops_data = crops()->data(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2); + for (int i = 0; i < spatial_dims_num * 2; ++i) + { + LUCI_INTERPRETER_CHECK(crops_data[i] >= 0); + } + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0); + output_batch_size = output_batch_size / block_shape_data[i]; + output_shape.dim(i + 1) = + input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1]; + } + + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void BatchToSpaceND::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData(input()), getTensorShape(block_shape()), + getTensorData(block_shape()), getTensorShape(crops()), + getTensorData(crops()), getTensorShape(output()), getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData(input()), getTensorShape(block_shape()), + getTensorData(block_shape()), getTensorShape(crops()), + getTensorData(crops()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h new file mode 100644 index 0000000..57703ea --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchToSpaceND : public Kernel +{ +public: + BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *crops() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp new file mode 100644 index 0000000..52647a7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, + std::initializer_list block_shape_shape, + std::initializer_list crops_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list block_shape_data, + std::initializer_list crops_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor crops_tensor = + makeInputTensor(crops_shape, crops_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template class BatchToSpaceNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes); + +TYPED_TEST(BatchToSpaceNDTest, Simple) +{ + Check(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2}, + /*output_shape=*/{1, 4, 4, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0}, + /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}); +} + +TEST(BatchToSpaceNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor( + {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(BatchToSpaceNDTest, Invalid_Crops_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor( + {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor({2, 2}, {0, 0, -1, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h new file mode 100644 index 0000000..2d2842a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H +#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H + +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0). +template +void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape, + const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, + const T *input2_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data, + Op op) +{ + if (unextended_input1_shape == unextended_input2_shape) + { + const int flat_size = tflite::MatchingElementsSize( + unextended_input1_shape, unextended_input2_shape, unextended_output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = op(input1_data[i], input2_data[i]); + } + } + else + { + assert(unextended_input1_shape.DimensionsCount() <= N); + assert(unextended_input2_shape.DimensionsCount() <= N); + assert(unextended_output_shape.DimensionsCount() <= N); + + tflite::NdArrayDesc desc1{}; + tflite::NdArrayDesc desc2{}; + tflite::NdArrayDesc output_desc{}; + tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, + &desc1, &desc2); + tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + auto fn = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + op(input1_data[SubscriptToIndex(desc1, indexes)], + input2_data[SubscriptToIndex(desc2, indexes)]); + }; + tflite::NDOpsHelper(output_desc, fn); + } +} + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt new file mode 100644 index 0000000..9f4ba0e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt @@ -0,0 +1,43 @@ +set(SOURCES + BinaryOpCommon.h + Utils.h + Utils.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp) + +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "${NODE}.h") + list(APPEND SOURCES "${NODE}.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) + +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common) + +add_pal_to_target(${LUCI_INTERPRETER_KERNELS}) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +macro(REGISTER_KERNEL NODE) + list(APPEND TEST_SOURCES "${NODE}.test.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) + +list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp) + +GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS}) diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp new file mode 100644 index 0000000..39ee725 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/Utils.h" + +namespace +{ + +using namespace luci_interpreter; +using namespace luci_interpreter::kernels; + +template +void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count) +{ + std::transform(in_data, in_data + elements_count, out_data, + [](InT a) { return static_cast(a); }); +} + +template void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor) +{ + auto const out_type = out_tensor->element_type(); + auto const elements_count = out_tensor->shape().num_elements(); + + switch (out_type) + { + case loco::DataType::U8: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::U16: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::U32: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::U64: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::S8: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::S16: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::S32: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::S64: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::FLOAT32: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + case loco::DataType::BOOL: + cast_data(in_data, getTensorData(out_tensor), elements_count); + break; + default: + throw std::runtime_error("Unsupported output type."); + } +} + +void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor) +{ + auto in_type = in_tensor->element_type(); + + switch (in_type) + { + case loco::DataType::U8: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::U16: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::U32: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::U64: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::S8: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::S16: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::S32: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::S64: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::FLOAT32: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + case loco::DataType::BOOL: + cast_from_pointer_to_tensor(getTensorData(in_tensor), out_tensor); + break; + default: + throw std::runtime_error("Unsupported input type."); + } +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Cast::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown); + LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown); + + const Shape &shape = input()->shape(); + output()->resize(shape); +} + +void Cast::execute() const +{ + assert(input()->shape().num_elements() == output()->shape().num_elements()); + + cast_from_tensor_to_tensor(input(), output()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h new file mode 100644 index 0000000..f0bd020 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CAST_H +#define LUCI_INTERPRETER_KERNELS_CAST_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Cast : public Kernel +{ +public: + Cast(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CAST_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp new file mode 100644 index 0000000..4713ad3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list shape, std::initializer_list input_data, + std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType input_type = getElementType(); + constexpr DataType output_type = getElementType(); + + Tensor input_tensor = makeInputTensor(shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template +void CheckBoolTo(std::initializer_list shape, std::initializer_list input_data, + std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType input_type = loco::DataType::BOOL; + constexpr DataType output_type = getElementType(); + std::vector::Type> input_data_converted; + for (auto elem : input_data) + { + input_data_converted.push_back(elem); + } + + Tensor input_tensor = + makeInputTensor(shape, input_data_converted, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template class CastTest : public ::testing::Test +{ +}; + +using IntDataTypes = + ::testing::Types; +TYPED_TEST_SUITE(CastTest, IntDataTypes); + +TYPED_TEST(CastTest, FloatToInt) +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToFloat) +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }); + SUCCEED(); +} + +template void check_int() +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToInt) +{ + check_int(); + check_int(); + check_int(); + check_int(); + check_int(); + check_int(); + check_int(); + check_int(); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToBool) +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 0, 7, 0, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, BoolToInt) +{ + CheckBoolTo(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1, 0, 0, 1, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToBool) +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToFloat) +{ + CheckBoolTo(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 0.0f, 1.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToFloat) +{ + Check(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToBool) +{ + CheckBoolTo(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, true, false, false, // + }, + /*output_data=*/ + { + true, true, false, false, // + }); + SUCCEED(); +} + +TEST(CastTest, UnsupportedType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::Unknown); + + Cast kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp new file mode 100644 index 0000000..46ee594 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Concatenation.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Concatenation::Concatenation(std::vector inputs, Tensor *output, + const ConcatenationParams ¶ms) + : KernelWithParams(std::move(inputs), {output}, params) +{ +} + +void Concatenation::configure() +{ + const int num_inputs = _inputs.size(); + LUCI_INTERPRETER_CHECK(num_inputs > 0); + const Tensor *t0 = _inputs[0]; + + // TODO: Support concat with fused activation function + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE); + + int axis = _params.axis; + if (axis < 0) + axis += t0->shape().num_dims(); + LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims()); + + int32_t sum_axis = t0->shape().dim(axis); + for (int i = 1; i < num_inputs; ++i) + { + const Tensor *tensor = _inputs[i]; + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); + for (int d = 0; d < t0->shape().num_dims(); ++d) + { + if (d == axis) + { + sum_axis += tensor->shape().dim(axis); + } + else + { + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); + } + } + } + + Shape output_shape = t0->shape(); + output_shape.dim(axis) = sum_axis; + + // If input tensors are INT8 type then quantization parameters of all input tensors and the output + // should be the same + for (auto current_tensor : _inputs) + { + if (current_tensor->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() == + output()->quantized_dimension()); + + LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() == + current_tensor->scales().size()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points()); + LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales()); + } + } + output()->resize(output_shape); +} + +void Concatenation::execute() const +{ + switch (_inputs[0]->element_type()) + { + case DataType::FLOAT32: + evalGeneric(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S8: + evalGeneric(); + break; + case DataType::S32: + evalGeneric(); + break; + case DataType::S64: + evalGeneric(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void Concatenation::evalGeneric() const +{ + int axis = _params.axis; + if (axis < 0) + axis += output()->shape().num_dims(); + + VectorOfTensors inputs(_inputs); + tflite::ConcatenationParams params{}; + params.axis = axis; + params.inputs_count = _inputs.size(); + tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(), + getTensorShape(output()), getTensorData(output())); +} + +void Concatenation::evalQuantized() const +{ + int axis = _params.axis; + if (axis < 0) + axis += output()->shape().num_dims(); + + VectorOfQuantizedTensors inputs(_inputs); + tflite::ConcatenationParams params{}; + params.axis = axis; + params.input_zeropoint = inputs.zero_point(); + params.input_scale = inputs.scale(); + params.inputs_count = _inputs.size(); + params.output_zeropoint = output()->zero_point(); + params.output_scale = output()->scale(); + + tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(), + getTensorShape(output()), + getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h new file mode 100644 index 0000000..b48c8ed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H +#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Concatenation : public KernelWithParams +{ +public: + Concatenation(std::vector inputs, Tensor *output, + const ConcatenationParams ¶ms); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void evalGeneric() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp new file mode 100644 index 0000000..f893b38 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Concatenation.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ConcatenationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(ConcatenationTest, Float) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + // Try different 'axis' and expect different results. + { + params.axis = 0; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + for (auto t : kernel.getOutputTensors()) + { + _memory_manager->allocate_memory(*t); + } + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + } + { + params.axis = -2; // Same as '0'. + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + } + { + params.axis = 1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); + } + { + params.axis = -1; // Same as '1'. + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); + } +} + +TEST_F(ConcatenationTest, Input_Number_Check_NEG) +{ + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Invalid_Axis_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -3; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({1, 2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({3, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG) +{ + std::vector input1_data{1, 2, 3, 4}; + std::vector input2_data{5, 6, 7, 8}; + Tensor input1_tensor = makeInputTensor({2, 2}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({2, 2}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + int quantized_dimension = 3; + std::vector scales{0.1, 0.2, 0.3}; + std::vector zero_points{1, -1, 1}; + + Tensor input1_tensor = makeInputTensor( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0)); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG) +{ + std::vector input1_data{1, 2, 3, 4}; + std::vector input2_data{5, 6, 7, 8}; + float scale = 0.1; + int32_t zero_point_1 = 1; + int32_t zero_point_2 = -1; + + Tensor input1_tensor = + makeInputTensor({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +// TODO: Remove this test when concat w/ fused_activation is supported +TEST_F(ConcatenationTest, With_Fused_Activation_NEG) +{ + std::vector input1_data{1, 2, 3, 4, 5, 6}; + std::vector input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = 1; + params.activation = luci::FusedActFunc::RELU; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp new file mode 100644 index 0000000..234f954 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Conv2D.h" + +#include "kernels/Utils.h" + +#include "PALConv2d.h" + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const Conv2DParams ¶ms) + : KernelWithParams({input, filter, bias}, {output, scratchpad}, params) +{ +} + +void Conv2D::configure() +{ + // TensorFlow Lite (as of v2.2.0) supports the following combinations of types: + // | input filter bias output | + // ----+---------------------------+ + // (1) | float float float float | + // (2) | float int8 float float | hybrid + // (3) | uint8 uint8 int32 uint8 | quantized + // (4) | int8 int8 int32 int8 | quantized per channel + // + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // + if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + } + else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); + } + else + { + throw std::runtime_error("Unsupported type."); + } + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3)); + + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == output_depth)); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); + + _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, + input_height, filter_height, output_height); + _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width, + filter_width, output_width); + + output()->resize({batches, output_height, output_width, output_depth}); + + // Allocate tensor for scratchpad, if needed. + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params, + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); + + switch (_params.activation) + { + case Activation::NONE: + case Activation::RELU: + case Activation::RELU6: + case Activation::RELU_N1_TO_1: + break; + default: + throw std::runtime_error("Unsupported fused activation"); + } +} + +void Conv2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + if (filter()->element_type() == DataType::FLOAT32) + { + evalFloat(); + break; + } + throw std::runtime_error("Unsupported type."); + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Conv2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + float *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data(); + + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output()), + getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantized() const +{ + const auto input_scale = static_cast(input()->scale()); + const auto filter_scale = static_cast(filter()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const double real_multiplier = input_scale * filter_scale / output_scale; + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = -filter()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output()), + getTensorShape(scratchpad), getTensorData(scratchpad)); +} + +void Conv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper quant_multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int32_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast(input_val - input()->zero_point()) * + static_cast(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void Conv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data(); + + luci_interpreter_pal::ConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData(input()), getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), getTensorShape(output()), + getTensorData(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast(input_val) * static_cast(filter_val); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, multipliers[out_c].multiplier, multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h new file mode 100644 index 0000000..330bf3a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H +#define LUCI_INTERPRETER_KERNELS_CONV2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class Conv2D : public KernelWithParams +{ +public: + Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const Conv2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp new file mode 100644 index 0000000..0fe6ef7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -0,0 +1,707 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Conv2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class Conv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(Conv2DTest, Float) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 + }; + std::vector ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatPointwise) +{ + Shape input_shape{1, 2, 2, 2}; + Shape filter_shape{2, 1, 1, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, // row = 0, col = 0 + 3, 4, // row = 0, col = 1 + 5, 6, // row = 1, col = 0 + 7, 8, // row = 1, col = 1 + }; + std::vector filter_data{ + -1, 2, // out = 0 + -3, 4, // out = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 4, 7, 6, 9, // row = 0 + 8, 11, 10, 13, // row = 1 + }; + std::vector ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatCheck) +{ + Shape input_shape{2, 2, 4, 1}; + Shape filter_shape{3, 2, 2, 1}; + Shape bias_shape{3}; + std::vector input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector bias_data{1, 2, 3}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, Uint8) +{ + std::vector input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector bias_data{1, 2, 3}; + + std::pair input_quant_param = quantizationParams(-63.5, 64); + std::pair output_quant_param = quantizationParams(-127, 128); + + Tensor input_tensor = + makeInputTensor({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor( + {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, Uint8_CWQ) +{ + const int output_channels = 3; + std::vector input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair input_quant_param = quantizationParams(0, 4); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(0, 4)); + filter_quant_params.push_back(quantizationParams(-1, 1)); + filter_quant_params.push_back(quantizationParams(-1, 1)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt8_CWQ) +{ + const int output_channels = 3; + std::vector input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair input_quant_param = quantizationParams(0, 4); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(std::pair(0.5, 0)); + filter_quant_params.push_back(std::pair(0.25, 0)); + filter_quant_params.push_back(std::pair(0.125, 0)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::S8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt16) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector ref_output_shape{1, 2, 2, 2}; + + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + std::vector ref_output_data{ + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 + }; + + Tensor input_tensor = + makeInputTensor(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, SInt16_CWQ_weights) +{ + Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C + Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels + Shape bias_shape{3}; + std::vector ref_output_shape{1, 2, 2, 3}; + + std::vector input_data{ + 1, 2, // row = 0, col 0 + 3, 4, // row = 0, col 1 + 5, 6, // row = 1, col 0 + 7, 8, // row = 1, col 1 + }; + std::vector filter_data{ + 4, -3, // out = 0 + 1, -3, // out = 1 + 5, -3, // out = 2 + }; + std::vector bias_data{1, 10, 5}; + std::vector ref_output_data{ + 0, 5, 4, // row 0, col 0 + 1, 1, 8, // row 0, col 1 + 3, 0, 12, // row 1, col 0 + 5, 0, 16, // row 1, col 1 + }; + + float input_scale = 0.25f; + float output_scale = 0.05f; + std::vector filter_scales = {0.25f, 0.2f, 0.1f}; + std::vector bias_scales; + for (int i = 0; i < filter_scales.size(); ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector zerop = {0, 0, 0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, zerop, 0, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Bias_Type_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Bias_Data_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{3}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2, 3}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 4, 6, 1}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::TANH; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp new file mode 100644 index 0000000..3a9acd1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthToSpace.h" +#include "Utils.h" +#include "PALDepthToSpace.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void DepthToSpace::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8) + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()) + const int block_size = params().block_size; + const int32_t input_height = input()->shape().dim(1); + const int32_t input_width = input()->shape().dim(2); + const int32_t input_channels = input()->shape().dim(3); + int32_t output_height = input_height * block_size; + int32_t output_width = input_width * block_size; + int32_t output_channels = input_channels / block_size / block_size; + + LUCI_INTERPRETER_CHECK(input_height == output_height / block_size); + LUCI_INTERPRETER_CHECK(input_width == output_width / block_size); + LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size); + + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = output_height; + output_shape.dim(2) = output_width; + output_shape.dim(3) = output_channels; + + output()->resize(output_shape); +} + +void DepthToSpace::execute() const +{ + tflite::DepthToSpaceParams op_params; + op_params.block_size = params().block_size; + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported Type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h new file mode 100644 index 0000000..63ce376 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class DepthToSpace : public KernelWithParams +{ +public: + DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp new file mode 100644 index 0000000..88e6e07 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthToSpace.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template class DepthToSpaceTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes); + +TYPED_TEST(DepthToSpaceTest, SimpleCase) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + std::vector output_data{1, 2, 5, 6, 3, 4, 7, 8}; + std::vector output_shape{1, 2, 4, 1}; + + Tensor input_tensor = + makeInputTensor()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType()); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(DepthToSpaceTest, InvalidInputShape_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 2, 4}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InvalidBlockSize_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 3; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp new file mode 100644 index 0000000..c554c30 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthwiseConv2D.h" + +#include "kernels/Utils.h" + +#include "PALDepthwiseConv2d.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, + Tensor *output, Tensor *scratchpad, + const DepthwiseConv2DParams ¶ms) + : KernelWithParams({input, filter, bias}, {output, scratchpad}, params) +{ +} + +void DepthwiseConv2D::configure() +{ + // TensorFlow Lite (as of v2.2.0) supports the following combinations of types: + // | input filter bias output | + // ----+---------------------------+ + // (1) | float float float float | + // (2) | float int8 float float | hybrid + // (3) | uint8 uint8 int32 uint8 | quantized + // (4) | int8 int8 int32 int8 | quantized per channel + // (5) | int16 int8 int64 int16 | quantized per channel 16x8 + // + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // + if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + } + else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(static_cast(filter()->shape().dim(3)) == + filter()->scales().size()); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); + } + else + { + throw std::runtime_error("Unsupported type."); + } + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + // Filter format: [1, H, W, O]. + LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t channels_out = filter_shape.dim(3); + + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == channels_out)); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); + + _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, + input_height, filter_height, output_height); + _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width, + filter_width, output_width); + + output()->resize({batches, output_height, output_width, channels_out}); + + tflite::DepthwiseParams params{}; + + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(), + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); +} + +void DepthwiseConv2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + if (filter()->element_type() == DataType::FLOAT32) + { + evalFloat(); + break; + } + throw std::runtime_error("Unsupported type."); + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(3))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void DepthwiseConv2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::DepthwiseConv( + params, getTensorShape(input()), getTensorData(input()), getTensorShape(filter()), + getTensorData(filter()), getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output())); +} + +void DepthwiseConv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + BroadcastableWrapper quant_multipliers(quant_multipliers_raw); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - _padding_width; + const int in_y_origin = (out_y * stride_height) - _padding_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + int32 input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)]; + int32 filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += (filter_val - filter()->zero_points()[output_channel]) * + (input_val - input()->zero_point()); + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + int32_t output_multiplier = quant_multipliers[output_channel].multiplier; + int output_shift = quant_multipliers[output_channel].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast(scaled_acc); + } + } + } + } + } +} + +void DepthwiseConv2D::evalQuantized() const +{ + const auto input_scale = static_cast(input()->scale()); + const auto filter_scale = static_cast(filter()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const double real_multiplier = input_scale * filter_scale / output_scale; + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = -filter()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::DepthwiseConv( + params, getTensorShape(input()), getTensorData(input()), getTensorShape(filter()), + getTensorData(filter()), getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output())); +} + +void DepthwiseConv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + + params.padding_type = tflite::PaddingType::kSame; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; + params.output_offset = output()->zero_point(); + params.output_multiplier = 1; // unused in tflite code + params.output_shift = 0; // unused in tflite code + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data(); + + luci_interpreter_pal::DepthwiseConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData(input()), getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), getTensorShape(output()), + getTensorData(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void DepthwiseConv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + + BroadcastableWrapper quant_multipliers(quant_multipliers_raw); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + for (int32_t m = 0; m < depth_multiplier; ++m) + { + const int32_t out_c = m + in_c * depth_multiplier; + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)]; + acc += static_cast(input_val) * static_cast(filter_val); + } + } + } + if (bias_data != nullptr) + { + acc += bias_data[out_c]; + } + + int32_t output_multiplier = quant_multipliers[out_c].multiplier; + int output_shift = quant_multipliers[out_c].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h new file mode 100644 index 0000000..3d1faf6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class DepthwiseConv2D : public KernelWithParams +{ +public: + DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const DepthwiseConv2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp new file mode 100644 index 0000000..6b4673f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthwiseConv2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DepthwiseConv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(DepthwiseConv2DTest, Float) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); +} + +TEST_F(DepthwiseConv2DTest, Uint8) +{ + std::vector input_data{ + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + + std::pair input_quant_param = quantizationParams(-63.5, 64); + std::pair output_quant_param = quantizationParams(-127, 128); + + Tensor input_tensor = + makeInputTensor({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor( + {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + std::vector ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); +} + +TEST_F(DepthwiseConv2DTest, SInt16) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector ref_output_shape{1, 2, 1, 4}; + + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + std::vector ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + Tensor input_tensor = + makeInputTensor(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S64, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector ref_output_shape{1, 2, 1, output_channels}; + + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + std::vector ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + float input_scale = 0.25; + std::vector filter_scales{0.2f, 1.f, 0.5f, 0.1f}; + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector zerop(4, 0); + Tensor input_tensor = + makeInputTensor(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, zerop, 3, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector ref_output_shape{1, 2, 1, output_channels}; + + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + std::vector ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair input_quant_param = quantizationParams(0, 16); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(-9, 13)); + filter_quant_params.push_back(quantizationParams(-14, 10)); + filter_quant_params.push_back(quantizationParams(-11, 15)); + filter_quant_params.push_back(quantizationParams(-16, 12)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector ref_output_shape{1, 2, 1, output_channels}; + + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + std::vector ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair input_quant_param = quantizationParams(-128, 127); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(std::pair(0.5, 0)); + filter_quant_params.push_back(std::pair(0.25, 0)); + filter_quant_params.push_back(std::pair(1, 0)); + filter_quant_params.push_back(std::pair(0.125, 0)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{4, 2, 2}; + Shape filter_shape{2, 2, 4}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{2, 1, 2, 4}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 4, 2}; + Shape bias_shape{4}; + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp new file mode 100644 index 0000000..96399e5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/Utils.h" +#include "PALDequantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Dequantize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 || + input()->element_type() == loco::DataType::U8 || + input()->element_type() == loco::DataType::S16); + + LUCI_INTERPRETER_CHECK(input()->scales().size() == 1); + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + + output()->resize(input()->shape()); +} + +void Dequantize::execute() const +{ + tflite::DequantizationParams op_params; + op_params.zero_point = input()->zero_point(); + op_params.scale = input()->scale(); + + switch (input()->element_type()) + { + case loco::DataType::U8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + } + case loco::DataType::S8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h new file mode 100644 index 0000000..5565df0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Dequantize : public Kernel +{ +public: + Dequantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp new file mode 100644 index 0000000..0cab633 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DequantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(DequantizeTest, Uint8) +{ + std::vector input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + std::vector ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint8) +{ + std::vector input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + std::vector ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint16) +{ + std::vector input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + std::vector ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, InvalidInputType_NEG) +{ + std::vector input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor = + makeInputTensor({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidOutputType_NEG) +{ + std::vector input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp new file mode 100644 index 0000000..dd15322 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" + +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms) + : KernelWithParams({input1, input2}, {output}, params) +{ +} + +void Div::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Div::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Div::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +template void Div::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +void Div::evalQuantized() const +{ + const auto input1_scale = static_cast(input1()->scale()); + const auto input2_scale = static_cast(input2()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const double real_output_multiplier = input1_scale / (input2_scale * output_scale); + + int32_t output_multiplier{}; + int output_shift{}; + + quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.h b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h new file mode 100644 index 0000000..c1bf3e1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DIV_H +#define LUCI_INTERPRETER_KERNELS_DIV_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Div : public KernelWithParams +{ +public: + Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DIV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp new file mode 100644 index 0000000..85cd8b9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +float GetTolerance(float min, float max) +{ + const float kQuantizedStep = (max - min) / 255.0f; + const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST_F(DivTest, Float) +{ + Shape base_shape = {2, 3, 1, 1}; + + std::vector output_shape = {2, 3, 1, 1}; + + std::vector input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f}; + std::vector test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f}; + + Tensor input1_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(base_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST_F(DivTest, FloatBroadcast) +{ + Shape input1_shape = {1, 3}; + Shape input2_shape = {3, 1}; + + std::vector input1_data{-0.3f, 2.3f, 0.9f}; + std::vector input2_data{0.2f, 1.6f, 0.5f}; + std::vector test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f}; + + Tensor input1_tensor = + makeInputTensor(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(input2_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(DivTest, Uint8) +{ + Shape base_shape = {1, 2, 2, 1}; + + std::vector output_shape = {1, 2, 2, 1}; + + std::vector input1_data = {-0.8f, -0.2f, 0.3f, 0.7f}; + std::vector input2_data = {-0.8f, 0.4f, 0.8f, 1.0f}; + std::vector test_outputs{1.0f, 0.f, 0.375f, 0.7f}; + + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + + std::pair quant_param = quantizationParams(-1.f, 1.f); + + Tensor input1_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get()); + + Tensor output_tensor = + makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(test_outputs, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + std::vector> test_outputs = {{5, 6, 2, 0, 10, 3, // + 10, 0, 4, 5, 20, 0, // + 0, 0, 0, 2, 0, 0, // + 2, 0, 1, 10, 5, 0, // + 2, 3, 1, 0, 5, 1, // + 18, 20, 7, 0, 37, 10}, + {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10}, + {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0, + 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10}, + {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}}; + std::vector input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100}; + std::vector input2_data{4, 5, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(DivTest, SInt64) +{ + checkInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, SInt32) +{ + checkInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DivTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(DivTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp new file mode 100644 index 0000000..697d63b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Elu.h" +#include "kernels/Utils.h" + +#include "PALElu.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Elu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Elu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h new file mode 100644 index 0000000..c844ab5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ELU_H +#define LUCI_INTERPRETER_KERNELS_ELU_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Elu : public Kernel +{ +public: + Elu(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp new file mode 100644 index 0000000..814499c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Elu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Elu kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + (void)output_shape; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); +} + +TEST(EluTest, SimpleElu) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }, + /*output_data=*/ + { + 0.0, -0.997521, 2.0, -0.981684, // + 3.0, -0.864665, 10.0, -0.0951626, // + }); +} + +TEST(EluTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Elu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp new file mode 100644 index 0000000..a57e127 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Equal::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Equal::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Equal::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template void Equal::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Equal::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h new file mode 100644 index 0000000..c9be32c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Equal : public Kernel +{ +public: + Equal(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp new file mode 100644 index 0000000..5870e54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class EqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(EqualTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, true, false, // Row 1 + false, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(EqualTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + false, true, false, // Row 1 + false, false, false, // Row 2 + false, false, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value, -2, max_value}; + + std::vector ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + false, true, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(EqualTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(EqualTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(EqualTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, true, false, false, // Row 1 + false, true, true, false, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair y_quant_param = quantizationParams(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + false, false, false, false, // Row 1 + false, false, true, false, // Row 2 + false, false, false, false, // Row 3 + true, true, true, true, // Row 4 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp new file mode 100644 index 0000000..e7c560a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" + +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Exp::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Exp::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Exp::evalFloat() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + tflite::reference_ops::Exp(getTensorData(input()), size, getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h new file mode 100644 index 0000000..4291773 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXP_H +#define LUCI_INTERPRETER_KERNELS_EXP_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Exp : public Kernel +{ +public: + Exp(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXP_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp new file mode 100644 index 0000000..a159d9d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(ExpTest, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + Shape input_shape{1, 1, 7}; + std::vector input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Exp kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_shape{1, 1, 7}; + std::vector ref_output_data{std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f), + std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f), + std::exp(-0.01f)}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp new file mode 100644 index 0000000..ba35c99 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output) + : Kernel({input, axis}, {output}) +{ +} + +void ExpandDims::configure() +{ + int32_t axis_value; + + switch (axis()->element_type()) + { + case loco::DataType::S32: + axis_value = *getTensorData(axis()); + break; + case loco::DataType::S64: + axis_value = static_cast(*getTensorData(axis())); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + const auto input_shape = input()->shape(); + + if (axis_value < 0) + { + axis_value += input_shape.num_dims() + 1; + } + + LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0); + + Shape output_shape(input_shape.num_dims() + 1); + for (int32_t i = 0; i < output_shape.num_dims(); ++i) + { + if (i < axis_value) + { + output_shape.dim(i) = input_shape.dim(i); + } + else if (i == axis_value) + { + output_shape.dim(i) = 1; + } + else + { + LUCI_INTERPRETER_CHECK(i >= 1); + output_shape.dim(i) = input_shape.dim(i - 1); + } + } + + output()->resize(output_shape); +} + +void ExpandDims::execute() const +{ + // Just copy input to output + const auto *input_data = input()->data(); + auto *output_data = output()->data(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h new file mode 100644 index 0000000..e510b11 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H +#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ExpandDims : public Kernel +{ +public: + ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp new file mode 100644 index 0000000..df9eacc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ExpandDimsTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(ExpandDimsTest, PositiveAxis) +{ + std::vector input_data{-1, 1, -2, 2}; + std::initializer_list input_shape = {2, 2}; + + std::initializer_list axis_value = {0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2})); +} + +TEST_F(ExpandDimsTest, NegAxis) +{ + std::vector input_data{-1, 1, -2, 2}; + std::initializer_list input_shape = {2, 2}; + + std::initializer_list axis_value = {-1}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1})); +} + +TEST_F(ExpandDimsTest, InvalidAxisType_NEG) +{ + std::vector input_data{-1, 1, -2, 2}; + std::initializer_list input_shape = {2, 2}; + + std::initializer_list axis_value = {1.0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ExpandDimsTest, InvalidAxisValue_NEG) +{ + std::vector input_data{-1, 1, -2, 2}; + std::initializer_list input_shape = {2, 2}; + + std::initializer_list axis_value = {3}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp new file mode 100644 index 0000000..e09d633 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/Utils.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output) + : Kernel({dims, value}, {output}) +{ +} + +template void Fill::configureShape() +{ + const auto dims_data = getTensorData(dims()); + Shape output_shape(dims()->shape().dim(0)); + + for (int i = 0; i < output_shape.num_dims(); ++i) + { + T data = dims_data[i]; + if (data < 0) + throw std::runtime_error("Fill dimensions must be >= 0"); + + output_shape.dim(i) = data; + } + + output()->resize(output_shape); +} + +void Fill::configure() +{ + const auto dims_shape = dims()->shape(); + const auto value_shape = value()->shape(); + + // Make sure the 1st input tensor is 1-D + LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1); + + // Make sure the 1st input tensor is int32 or int64 + LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or + dims()->element_type() == DataType::S64); + + // Make sure the 2nd input tensor is a scalar + LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0) + + // Check zero point and scale for S16 and S8 + if (value()->element_type() == loco::DataType::S16 or + value()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale()); + LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point()); + + if (value()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(value()->zero_point() == 0); + } + // Resize output + switch (dims()->element_type()) + { + case DataType::S32: + configureShape(); + break; + case DataType::S64: + configureShape(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Fill::execute() const +{ + switch (output()->element_type()) + { + case DataType::S8: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S16: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::S64: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::FLOAT32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData(value()), + getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h new file mode 100644 index 0000000..184f0cb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FILL_H +#define LUCI_INTERPRETER_KERNELS_FILL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Fill : public Kernel +{ +public: + Fill(const Tensor *dims, const Tensor *value, Tensor *output); + + const Tensor *dims() const { return _inputs[0]; } + const Tensor *value() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void configureShape(); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FILL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp new file mode 100644 index 0000000..cf56df5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FillTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +template void runFillIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector dims_data = {2, 3}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor
(/*scalar*/ {}, value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + + std::vector ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +template void runFillQuantIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector dims_data = {2, 3}; + std::vector value_data = {5}; + + int32_t zero_point = 0; + + if (DT == loco::DataType::S8) + zero_point = 1; + + Tensor dims = makeInputTensor(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor
(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point, + value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + + std::vector ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, FillInt) +{ + // Run for int32_t input + runFillIntKernel(_memory_manager.get()); + // Run for int64_t input + runFillIntKernel(_memory_manager.get()); + // Run for int8_t input + runFillQuantIntKernel(_memory_manager.get()); + // Run for int16_t input + runFillQuantIntKernel(_memory_manager.get()); + + SUCCEED(); +} + +TEST_F(FillTest, FillFloat) +{ + Shape dims_shape{3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{5, 5, 5, 5, 5, 5, 5, 5}; + + std::vector ref_output_shape{2, 2, 2}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, Invalid_Input_Shape_NEG) +{ + Shape dims_shape{1, 3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FillTest, Invalid_Value_Shape_NEG) +{ + Shape dims_shape{3}; + + std::vector dims_data = {2, 2, 2}; + std::vector value_data = {5}; + + Tensor dims = makeInputTensor(dims_shape, dims_data, _memory_manager.get()); + Tensor value = makeInputTensor({1}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp new file mode 100644 index 0000000..e3c4246 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Floor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Floor::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Floor::evalFloat() const +{ + tflite::reference_ops::Floor(getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h new file mode 100644 index 0000000..ca3ad59 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Floor : public Kernel +{ +public: + Floor(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp new file mode 100644 index 0000000..30076fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(FloorTest, SimpleFloat) +{ + std::initializer_list input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0.2, 8.6, 2.4, 4.3, // Row 1 + 3, 7.1, 10.5, -0.9, // Row 2 + }; + + std::initializer_list ref_output_shape{1, 2, 4, 1}; + std::vector ref_output_data{ + 0, 8, 2, 4, // Row 1 + 3, 7, 10, -1, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Floor kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Floor kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp new file mode 100644 index 0000000..a7a10a3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +void FloorDiv::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void FloorDiv::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FloorDiv::evalFloat() const +{ + auto FloorDivFunc = [](float x, float y) -> float { + return std::floor(static_cast(x) / static_cast(y)); + }; + + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + + // Check the denominator + for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i) + { + LUCI_INTERPRETER_CHECK(y_data[i] != 0); + } + + if (x()->shape() != y()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData(output()), FloorDivFunc); + } + else + { + tflite::reference_ops::BinaryFunction( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData(output()), FloorDivFunc); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h new file mode 100644 index 0000000..e9c47d8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FloorDiv : public Kernel +{ +public: + FloorDiv(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp new file mode 100644 index 0000000..3e1b5f1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorDivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(FloorDivTest, FloatSimple) +{ + Shape x_shape{2, 3}; + std::vector x_data{ + 0.5, 2.4, 3.1, // Row 1 + 1.9, -1.9, -2.8, // Row 2 + }; + + Shape y_shape = x_shape; + std::vector y_data{ + 2.0, 0.5, 3.0, // Row 1 + 1.0, -1.0, -2.0, // Row 2 + }; + + std::vector ref_output_shape{2, 3}; + std::vector ref_output_data{ + 0, 4, 1, // Row 1 + 1, 1, 1, // Row 2 + }; + + Tensor x_tensor = makeInputTensor(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, FloatBroadcast) +{ + Shape x_shape{1, 3}; + std::vector x_data{ + 0.5, 2.4, -3.1, // Row 1 + }; + + Shape y_shape{3, 3}; + std::vector y_data{ + 1.0, 1.0, 1.0, // Row 1 + 2.0, -0.5, -2.0, // Row 2 + 0.3, 0.7, 0.9, // Row 3 + }; + + std::vector ref_output_shape{3, 3}; + std::vector ref_output_data{ + 0, 2, -4, // Row 1 + 0, -5, 1, // Row 2 + 1, 3, -4, // Row 3 + }; + + Tensor x_tensor = makeInputTensor(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, DivByZero_NEG) +{ + Shape shape{3}; + std::vector x_data{1, 0, -1}; + std::vector y_data{0, 0, 0}; + + Tensor x_tensor = makeInputTensor(shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor(shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorDivTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp new file mode 100644 index 0000000..bd2bb2f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FullyConnected.h" + +#include "kernels/Utils.h" + +#include "PALFullyConnected.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, + Tensor *output, const FullyConnectedParams ¶ms) + : KernelWithParams({input, weights, bias}, {output}, params) +{ +} + +void FullyConnected::configure() +{ + if (weights()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else if (weights()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32) + } + else if (weights()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else + { + throw std::runtime_error("Unsupported type."); + } + + const Shape &input_shape = input()->shape(); + const Shape &weights_shape = weights()->shape(); + + LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(bias() == nullptr || + bias()->shape().num_elements() == weights_shape.dim(0)); + + LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0); + const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1); + const int32_t num_units = weights_shape.dim(0); + + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0)); + + if (params().keep_num_dims == false) + { + output()->resize({batch_size, num_units}); + } + else + { + luci_interpreter::Shape output_shape(input_shape.num_dims()); + for (int i = 0; i < input_shape.num_dims(); ++i) + output_shape.dim(i) = input_shape.dim(i); + output_shape.dim(input_shape.num_dims() - 1) = num_units; + output()->resize(output_shape); + } +} + +void FullyConnected::execute() const +{ + switch (input()->element_type()) + { + case DataType::U8: + evalQuantized(); + break; + case DataType::S8: + evalQuantizedS8(); + break; + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FullyConnected::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::FullyConnectedParams params{}; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault; + + tflite::reference_ops::FullyConnected( + params, getTensorShape(input()), getTensorData(input()), getTensorShape(weights()), + getTensorData(weights()), getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output())); +} + +void FullyConnected::evalQuantized() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + tflite::reference_ops::FullyConnected( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(weights()), + getTensorData(weights()), getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output())); +} + +void FullyConnected::evalQuantizedS8() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + luci_interpreter_pal::FullyConnected( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(weights()), + getTensorData(weights()), getTensorShape(bias()), getTensorData(bias()), + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h new file mode 100644 index 0000000..2a7c068 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H +#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FullyConnected : public KernelWithParams +{ +public: + FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output, + const FullyConnectedParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weights() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS8() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp new file mode 100644 index 0000000..4474cc4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FullyConnected.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list weights_shape, + std::initializer_list bias_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list weights_data, + std::initializer_list bias_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list weights_shape, + std::initializer_list bias_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list weights_data, + std::initializer_list bias_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair input_quant_param = quantizationParams(-63.5, 64); + std::pair output_quant_param = quantizationParams(-127, 128); + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <> +void Check( + std::initializer_list input_shape, std::initializer_list weights_shape, + std::initializer_list bias_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list weights_data, + std::initializer_list bias_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair input_quant_param = quantizationParams(-63.5, 64); + std::pair output_quant_param = quantizationParams(-127, 128); + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template class FullyConnectedTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(FullyConnectedTest, DataTypes); + +TYPED_TEST(FullyConnectedTest, Simple) +{ + Check({3, 2, 2, 1}, {3, 6}, {3}, {2, 3}, + { + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }, + { + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }, + {-1, -5, -8}, + { + 0, 0, 32, // batch = 0 + 22, 11, 47, // batch = 1 + }); +} + +TEST(FullyConnectedTest, InvalidBiasType_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{3, 6}; + std::vector weights_data{ + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }; + Shape bias_shape{3}; + std::vector bias_data{-1, -5, -8}; + + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{1, 3, 6}; + std::vector weights_data{ + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }; + Shape bias_shape{3}; + std::vector bias_data{-1, -5, -8}; + + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{6, 3}; + std::vector weights_data{ + -3, -7, 4, // unit = 0 + -4, -6, 4, // unit = 1 + 3, 5, 2, // unit = 2 + 3, -3, -8, // unit = 3 + -3, 7, 4, // unit = 4 + 9, 0, -5, // unit = 5 + }; + Shape bias_shape{3}; + std::vector bias_data{-1, -5, -8}; + + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp new file mode 100644 index 0000000..f125666 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2021 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/Utils.h" +#include "PALGather.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output, + const GatherParams &gparams) + : KernelWithParams({params, indices}, {output}, gparams) +{ +} + +void Gather::configure() +{ + if (params()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 || + indices()->element_type() == DataType::S64); + + // refer tensorflow/lite/kernels/gather.cc + + const Shape ¶ms_shape = params()->shape(); + const Shape &indices_shape = indices()->shape(); + + int axis = _params.axis; + if (axis < 0) + { + axis += params_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims()); + + int batch_dims = _params.batch_dims; + // batch_dims should be in range: [-rank(indices), rank(indices)]. + // Negative batch_dims is added with rank of positions. + if (batch_dims < 0) + { + batch_dims += indices_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(batch_dims <= axis); + LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims()); + LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims()); + for (int i = 0; i < batch_dims; ++i) + { + LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i)); + } + + const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims; + + Shape output_shape(num_dimensions); + int output_index = 0; + for (int i = 0; i < axis; ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + for (int i = batch_dims; i < indices_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = indices_shape.dim(i); + } + for (int i = axis + 1; i < params_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + output()->resize(output_shape); +} + +void Gather::execute() const +{ + switch (params()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Gather::evalFloat() const +{ + assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64); + + const auto params_data = getTensorData(params()); + auto output_data = getTensorData(output()); + + tflite::GatherParams tparams; + tparams.axis = _params.axis; + tparams.batch_dims = _params.batch_dims; + + if (indices()->element_type() == DataType::S32) + { + const auto indices_data = getTensorData(indices()); + + luci_interpreter_pal::Gather(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } + else + { + const auto indices_data = getTensorData(indices()); + + luci_interpreter_pal::Gather(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h new file mode 100644 index 0000000..cc02d64 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H +#define LUCI_INTERPRETER_KERNELS_GATHER_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Gather : public KernelWithParams +{ +public: + Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams); + + const Tensor *params() const { return _inputs[0]; } + const Tensor *indices() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GATHER_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp new file mode 100644 index 0000000..4b3dda7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GatherTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(GatherTest, Simple) +{ + std::vector params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector indices_data{1, 0, 1, 5}; + std::vector ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4})); +} + +TEST_F(GatherTest, Simple_Batch) +{ + Shape params_shape = {3, 5}; + Shape indices_shape = {3, 2}; + std::vector params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.}; + std::vector indices_data{2, 4, 0, 4, 1, 3}; + std::vector ref_output_data{1., 2., 3., 4., 5., 6.}; + + Tensor params_tensor = + makeInputTensor(params_shape, params_data, _memory_manager.get()); + Tensor indices_tensor = + makeInputTensor(indices_shape, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2})); +} + +TEST_F(GatherTest, Simple_NEG) +{ + Tensor params_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Axis_NEG) +{ + Tensor params_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 100; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Batch_NEG) +{ + std::vector params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector indices_data{1, 0, 1, 5}; + std::vector ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 0; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp new file mode 100644 index 0000000..5ccae3c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Greater::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Greater::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Greater::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template void Greater::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Greater::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h new file mode 100644 index 0000000..065f76d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H +#define LUCI_INTERPRETER_KERNELS_GREATER_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Greater : public Kernel +{ +public: + Greater(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp new file mode 100644 index 0000000..a480801 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(GreaterTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value + 1, -2, max_value}; + + std::vector ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, false, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedRescale) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + std::pair y_quant_param = quantizationParams(F_MIN * 2, F_MAX * 3); + + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + true, false, true, false, // Row 1 + true, true, false, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp new file mode 100644 index 0000000..27e42c9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output) + : Kernel({x, y}, {output}) +{ +} + +void GreaterEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void GreaterEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void GreaterEqual::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template void GreaterEqual::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +void GreaterEqual::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h new file mode 100644 index 0000000..e333c30 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class GreaterEqual : public Kernel +{ +public: + GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp new file mode 100644 index 0000000..35bf88e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(GreaterEqualTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterEqualTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + false, true, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value + 1, -2, max_value}; + + std::vector ref_output_data{false, true, true}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value - 1, // Row 4 + }; + + std::vector y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterEqualTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterEqualTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterEqualTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedRescale) +{ + std::vector x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.5, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + std::pair y_quant_param = quantizationParams(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + true, false, true, false, // Row 1 + true, true, true, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp new file mode 100644 index 0000000..971708b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/If.h" +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +static std::vector joinInputs(const Tensor *cond, + const std::vector &inputs) +{ + std::vector result{cond}; + result.insert(result.cend(), inputs.cbegin(), inputs.cend()); + return result; +} + +If::If(const Tensor *cond, const std::vector &inputs, std::vector outputs, + RuntimeGraph *then_graph, RuntimeGraph *else_graph) + : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph), + _else_graph(else_graph) +{ +} + +void If::configure() +{ + LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL); + LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1); + + for (RuntimeGraph *graph : {_then_graph, _else_graph}) + { + (void)graph; + LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1); + LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size()); + } +} + +void If::execute() const +{ + const bool cond_value = cond()->data()[0]; + + RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph; + const auto &graph_inputs = active_graph->getInputTensors(); + const auto &graph_outputs = active_graph->getOutputTensors(); + + // Copy kernel inputs to active graph inputs. + for (size_t i = 0; i < getInputTensors().size() - 1; ++i) + { + LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type()); + graph_inputs[i]->resize(input(i)->shape()); + + const int32_t num_elements = input(i)->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(input(i)->element_type()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(graph_inputs[i]); + std::memcpy(graph_inputs[i]->data(), input(i)->data(), num_elements * element_size); + } + + active_graph->execute(); + + // Copy graph outputs to kernel outputs. + for (size_t i = 0; i < getOutputTensors().size(); ++i) + { + LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type()); + output(i)->resize(graph_outputs[i]->shape()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(output(i)); + + const int32_t num_elements = output(i)->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(output(i)->element_type()); + std::memcpy(output(i)->data(), graph_outputs[i]->data(), + num_elements * element_size); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.h b/compiler/luci-micro/luci-interpreter/src/kernels/If.h new file mode 100644 index 0000000..fa6ab37 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_IF_H +#define LUCI_INTERPRETER_KERNELS_IF_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class If : public Kernel +{ +public: + If(const Tensor *cond, const std::vector &inputs, std::vector outputs, + RuntimeGraph *then_graph, RuntimeGraph *else_graph); + + const Tensor *cond() const { return _inputs[0]; } + const Tensor *input(int index) const { return _inputs[1 + index]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + RuntimeGraph *const _then_graph; + RuntimeGraph *const _else_graph; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_IF_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp new file mode 100644 index 0000000..c5f4faf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeModule.h" +#include "kernels/Add.h" +#include "kernels/If.h" +#include "kernels/Mul.h" +#include "kernels/TestUtils.h" + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class IfTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input1 = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *input2 = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *output = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input1, input2}); + graph->setOutputTensors({output}); + + AddParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique(input1, input2, output, params)); + + return graph; +} + +RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input1 = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *input2 = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *output = graph->addTensor( + std::make_unique(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input1, input2}); + graph->setOutputTensors({output}); + + MulParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique(input1, input2, output, params)); + + return graph; +} + +TEST_F(IfTest, CondTrue) +{ + Tensor cond = makeInputTensor({1}, {true}, _memory_manager.get()); + Tensor input1 = makeInputTensor({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output), FloatArrayNear({6, 9})); +} + +TEST_F(IfTest, CondFalse) +{ + Tensor cond = makeInputTensor({1}, {false}, _memory_manager.get()); + Tensor input1 = makeInputTensor({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output), FloatArrayNear({5, 14})); +} + +TEST_F(IfTest, InvalidCondType_NEG) +{ + Tensor cond = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input1 = makeInputTensor({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(IfTest, InvalidCondElementNum_NEG) +{ + Tensor cond = makeInputTensor({2}, {false, true}, _memory_manager.get()); + Tensor input1 = makeInputTensor({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp new file mode 100644 index 0000000..22a329b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/InstanceNorm.h" + +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, + Tensor *output, const InstanceNormParams ¶ms) + : KernelWithParams({input, gamma, beta}, {output}, params) +{ +} + +void InstanceNorm::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) || + gamma()->shape().dim(0) == 1); + LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) || + beta()->shape().dim(0) == 1); + output()->resize(input()->shape()); +} + +void InstanceNorm::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void InstanceNorm::evalFloat() const +{ + float activation_min, activation_max; + calculateActivationRange(params().activation, &activation_min, &activation_max); + auto input_shape = getTensorShape(input()); + auto output_shape = getTensorShape(output()); + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1); + const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2); + const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3); + const float *input_data = getTensorData(input()); + const float *gamma_data = getTensorData(gamma()); + auto gamma_shape = getTensorShape(gamma()); + bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1; + const float *beta_data = getTensorData(beta()); + auto beta_shape = getTensorShape(beta()); + bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1; + float *output_data = getTensorData(output()); + for (int32_t batch = 0; batch < batches; batch++) + { + for (int32_t channel = 0; channel < channels; channel++) + { + double sum = 0.0f; + double square_sum = 0.0f; + int32_t size = heights * widths; + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)]; + sum += input_val; + square_sum += (input_val * input_val); + } + } + double mean = sum / size; + double var = square_sum / size - mean * mean; + + double gamma = single_gamma ? gamma_data[0] : gamma_data[channel]; + double beta = single_beta ? beta_data[0] : beta_data[channel]; + double a = gamma / (std::sqrt(var + params().epsilon)); + double b = -mean * a + beta; + + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_value = + input_data[tflite::Offset(output_shape, batch, height, width, channel)]; + double output_value = input_value * a + b; + output_data[tflite::Offset(output_shape, batch, height, width, channel)] = + tflite::ActivationFunctionWithMinMax((float)output_value, activation_min, + activation_max); + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h new file mode 100644 index 0000000..a70a84e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H +#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class InstanceNorm : public KernelWithParams +{ +public: + InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output, + const InstanceNormParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *gamma() const { return _inputs[1]; } + const Tensor *beta() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp new file mode 100644 index 0000000..04400c3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernels/InstanceNorm.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class InstanceNormTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(InstanceNormTest, Simple) +{ + Tensor input_tensor = + makeInputTensor({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); +} + +TEST_F(InstanceNormTest, Single_gamma_beta) +{ + Tensor input_tensor = + makeInputTensor({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2})); +} + +TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG) +{ + Tensor input_tensor = + makeInputTensor({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor({3}, {1, 1, 1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor({3}, {2, 2, 2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp new file mode 100644 index 0000000..6422295 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Normalize.h" +#include "kernels/Utils.h" + +#include "PALL2Normalize.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void L2Normalize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (output()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.)); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 128); + } + LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE); + output()->resize(input()->shape()); +} + +void L2Normalize::execute() const +{ + switch (output()->element_type()) + { + case DataType::FLOAT32: + eval(0); + break; + case DataType::U8: + eval(input()->zero_point()); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void L2Normalize::eval(int32_t zero_point) const +{ + tflite::L2NormalizationParams op_params{}; + op_params.input_zero_point = zero_point; + luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h new file mode 100644 index 0000000..6c7dac6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H +#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class L2Normalize : public KernelWithParams +{ +public: + L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void eval(int32_t zero_point) const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp new file mode 100644 index 0000000..6f960e8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernels/L2Normalize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::pair quant_param = + quantizationParams(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template class L2NormalizeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(L2NormalizeTest, DataTypes); + +TYPED_TEST(L2NormalizeTest, Simple) +{ + Check({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, + {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}); +} + +TEST(L2NormalizeTest, ActivationType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor({1, 1, 1, 6}, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + L2NormParams params{}; + params.activation = Activation::RELU6; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp new file mode 100644 index 0000000..5a88808 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Pool2D.h" + +#include "kernels/Utils.h" + +#include "PALL2Pool2D.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void L2Pool2D::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int batches = input()->shape().dim(0); + int height = input()->shape().dim(1); + int width = input()->shape().dim(2); + int channels_out = input()->shape().dim(3); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params().padding; + int out_width, out_height; + out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1); + out_height = + computeOutputSize(padding, height, params().filter_height, params().stride_height, 1); + _padding_width = + computePadding(params().stride_width, 1, width, params().filter_width, out_width); + _padding_height = + computePadding(params().stride_height, 1, height, params().filter_height, out_height); + + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); + output()->resize({batches, out_height, out_width, channels_out}); +} + +void L2Pool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + float activation_min, activation_max; + calculateActivationRange(params().activation, &activation_min, &activation_max); + tflite::PoolParams op_params; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + op_params.filter_height = params().filter_height; + op_params.filter_width = params().filter_width; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.float_activation_min = activation_min; + op_params.float_activation_max = activation_max; + luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h new file mode 100644 index 0000000..d40f5f4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H +#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class L2Pool2D : public KernelWithParams +{ +public: + L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + int32_t _padding_height = 0; + int32_t _padding_width = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp new file mode 100644 index 0000000..7245456 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Pool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class L2Pool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(L2Pool2DTest, FloatNone) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{3.5, 6.5}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + -1, -6, 2, 4, // + -3, -2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{3.53553, 6.5}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu1) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU_N1_TO_1; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.353553, 1.0}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu6) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU6; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.353553, 6.0}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingSame) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::SAME; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{3.5, 6.5}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingSameStride) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::SAME; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0}; + // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05 + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingValidStride) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{3.5, 6.0, 6.5}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp new file mode 100644 index 0000000..3833a55 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LeakyRelu.h" + +#include "kernels/Utils.h" + +#include + +#include "PALLeakyRelu.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void LeakyRelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + double alpha_multiplier = input()->scale() * params().alpha / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + output()->resize(input()->shape()); +} + +void LeakyRelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LeakyRelu::evalFloat() const +{ + tflite::LeakyReluParams op_params{}; + op_params.alpha = params().alpha; + luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void LeakyRelu::evalQuantized() const +{ + tflite::LeakyReluParams op_params{}; + op_params.input_offset = input()->zero_point(); + op_params.output_offset = output()->zero_point(); + op_params.output_multiplier_alpha = _output_multiplier_alpha; + op_params.output_shift_alpha = _output_shift_alpha; + op_params.output_multiplier_identity = _output_multiplier_identity; + op_params.output_shift_identity = _output_shift_identity; + + tflite::reference_ops::QuantizeLeakyRelu( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(output()), + getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h new file mode 100644 index 0000000..e66f404 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H +#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LeakyRelu : public KernelWithParams +{ +public: + LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + +private: + int32_t _output_multiplier_alpha = 0; + int _output_shift_alpha = 0; + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp new file mode 100644 index 0000000..0f6263b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LeakyRelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data, + float alpha) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + LeakyReluParams params{}; + params.alpha = alpha; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list output_data, float alpha) +{ + std::unique_ptr memory_manager = std::make_unique(); + const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255); + std::pair quant_param = quantizationParams(-8, 127.f / 16.f); + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + LeakyReluParams params{}; + params.alpha = alpha; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template class LeakReluTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(LeakReluTest, DataTypes); + +TYPED_TEST(LeakReluTest, Simple) +{ + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -1.0f, // Row 2 + }, + /*alpha=*/0.5f); + + SUCCEED(); +} + +TEST(LeakReluTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({2, 3}, + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LeakyReluParams params{}; + params.alpha = 0.5f; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp new file mode 100644 index 0000000..8d26ff2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Less::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Less::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Less::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +template void Less::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Less::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.h b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h new file mode 100644 index 0000000..e27bb68 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_H +#define LUCI_INTERPRETER_KERNELS_LESS_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Less : public Kernel +{ +public: + Less(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp new file mode 100644 index 0000000..8c59633 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LessTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + true, false, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value + 1, -2, max_value}; + + std::vector ref_output_data{true, false, false}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedRescale) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + std::pair y_quant_param = quantizationParams(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + false, true, false, true, // Row 1 + false, false, false, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp new file mode 100644 index 0000000..b474bc4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void LessEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void LessEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LessEqual::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template void LessEqual::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void LessEqual::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h new file mode 100644 index 0000000..f82ea90 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LessEqual : public Kernel +{ +public: + LessEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp new file mode 100644 index 0000000..b2e2fa7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LessEqualTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessEqualTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value + 1, -2, max_value}; + + std::vector ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, true, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessEqualTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessEqualTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessEqualTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedRescale) +{ + std::vector x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + std::pair y_quant_param = quantizationParams(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + false, true, false, true, // Row 1 + false, false, true, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp new file mode 100644 index 0000000..a2bf442 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LocalResponseNormalization.h" + +#include "kernels/Utils.h" + +#include "PALLocalResponseNormalization.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +LocalResponseNormalization::LocalResponseNormalization( + const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void LocalResponseNormalization::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void LocalResponseNormalization::execute() const +{ + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::LocalResponseNormalizationParams op_params; + op_params.range = params().radius; + op_params.bias = params().bias; + op_params.alpha = params().alpha; + op_params.beta = params().beta; + luci_interpreter_pal::LocalResponseNormalization( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h new file mode 100644 index 0000000..60408a1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H +#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LocalResponseNormalization : public KernelWithParams +{ +public: + LocalResponseNormalization(const Tensor *input, Tensor *output, + const LocalResponseNormalizationParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp new file mode 100644 index 0000000..4a9d473 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LocalResponseNormalization.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LocalResponseNormalizationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LocalResponseNormalizationTest, SameAsL2Norm) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); +} + +TEST_F(LocalResponseNormalizationTest, WithAlpha) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})); +} + +TEST_F(LocalResponseNormalizationTest, WithBias) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 9.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})); +} + +TEST_F(LocalResponseNormalizationTest, SmallRadius) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 2; + params.bias = 9.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG) +{ + Tensor input_tensor = makeInputTensor( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp new file mode 100644 index 0000000..79c3153 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" + +#include "kernels/Utils.h" + +#include + +#include "PALLogSoftmax.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogSoftmax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 255); + + tflite::SoftmaxParams params{}; + + params.table = _table; + params.beta = 1.0; + luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta); + } + output()->resize(input()->shape()); +} + +void LogSoftmax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LogSoftmax::evalFloat() const +{ + tflite::SoftmaxParams params{}; + tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void LogSoftmax::evalQuantized() const +{ + const auto input_shape = getTensorShape(input()); + const auto output_shape = getTensorShape(output()); + const auto input_scale = input()->scale(); + uint8_t *output_data = getTensorData(output()); + const uint8_t *input_data = getTensorData(input()); + const float beta = 1.0; + + tflite::SoftmaxParams params{}; + + params.table = const_cast(_table); + params.zero_point = output()->zero_point(); + params.scale = output()->scale(); + + luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta); + luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h new file mode 100644 index 0000000..18477fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H +#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogSoftmax : public Kernel +{ +public: + LogSoftmax(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + + float _table[256]; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp new file mode 100644 index 0000000..50dcd5c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogSoftmaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LogSoftmaxTest, Float) +{ + Shape input_shape{2, 4}; + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(LogSoftmaxTest, Uint8) +{ + float kMin = -10; + float kMax = 10; + float kLogSoftmaxQuantizedTolerance = 16. / 256; + std::pair quant_param = quantizationParams(kMin, kMax); + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + std::vector ref_output_shape{2, 4}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111})); +} + +TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG) +{ + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor({2, 4}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG) +{ + std::pair quant_param = quantizationParams(-10, 10); + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp new file mode 100644 index 0000000..8e72632 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalAnd::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalAnd::execute() const +{ + switch (input1()->element_type()) + { + case DataType::BOOL: + evalLogicalAnd(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalAnd::evalLogicalAnd() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), + [](bool x, bool y) { return x && y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h new file mode 100644 index 0000000..46b8899 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H +#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalAnd : public Kernel +{ +public: + LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalAnd() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp new file mode 100644 index 0000000..21b7951 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalAndTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LogicalAndTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor1 = + makeInputTensor(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, {true, false, true, false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAre(true, false, false, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, Broadcast) +{ + Tensor input_tensor1 = makeInputTensor({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor({1, 1, 1, 1}, {true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({1, 1, 1, 1}, {false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalAndTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1, 1, 1, 1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp new file mode 100644 index 0000000..65ab961 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogicalNot::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void LogicalNot::execute() const +{ + switch (input()->element_type()) + { + case DataType::BOOL: + evalLogicalNot(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalNot::evalLogicalNot() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + bool *output_data = getTensorData(output()); + const bool *input_data = getTensorData(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = !input_data[i]; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h new file mode 100644 index 0000000..1608faf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H +#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalNot : public Kernel +{ +public: + LogicalNot(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalNot() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp new file mode 100644 index 0000000..3cbf27f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalNotTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LogicalNotTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAre(false, true, true, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalNotTest, OutputTypeInvalid_NEG) +{ + Tensor input_tensor = makeInputTensor({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalNotTest, InputTypeInvalid_NEG) +{ + Tensor input_tensor = + makeInputTensor({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp new file mode 100644 index 0000000..f289ca6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" + +#include "kernels/Utils.h" +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalOr::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalOr::execute() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), + [](bool x, bool y) { return x || y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h new file mode 100644 index 0000000..8860648 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H +#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalOr : public Kernel +{ +public: + LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp new file mode 100644 index 0000000..d65a69a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalOrTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(LogicalOrTest, Basic) +{ + Tensor input1_tensor = makeInputTensor({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1, 1, 1, 4}, {true, false, true, false}, + _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAre(true, false, true, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, Broadcast) +{ + Tensor input1_tensor = makeInputTensor({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalOrTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1, 1, 1, 1}, {0}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp new file mode 100644 index 0000000..58e4f18 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Logistic.h" + +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Logistic::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256); + populateLookupTable(); + } + output()->resize(input()->shape()); +} + +void Logistic::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Logistic::evalFloat() const +{ + tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void Logistic::evalQuantized() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + uint8_t *output_data = getTensorData(output()); + const uint8_t *input_data = getTensorData(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = getTableValue(input_data[i]); + } +} + +void Logistic::populateLookupTable() +{ + const auto input_scale = static_cast(input()->scale()); + const auto input_zero_point = static_cast(input()->zero_point()); + const auto output_scale = static_cast(output()->scale()); + const auto output_zero_point = static_cast(output()->zero_point()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits::max(); + int32_t minval = std::numeric_limits::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = 1.0f / (1.0f + std::exp(-dequantized)); + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast(rescaled + output_zero_point); + setTableValue(static_cast(std::max(std::min(maxval, quantized), minval)), + static_cast(val)); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h new file mode 100644 index 0000000..31de6ad --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H +#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Logistic : public Kernel +{ +public: + Logistic(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void populateLookupTable(); + void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; }; + uint8_t getTableValue(uint8_t idx) const { return _table[idx]; }; + +private: + uint8_t _table[256]{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp new file mode 100644 index 0000000..5a1ea66 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Logistic.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType()); + + Logistic kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::pair input_quant_param = + quantizationParams(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale() * 2)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template class LogisticTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(LogisticTest, DataTypes); + +TYPED_TEST(LogisticTest, Simple) +{ + Check( + {89}, {89}, + {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636, + -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000, + -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364, + -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727, + -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091, + -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455, + -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818, + -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818, + 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455, + 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091, + 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727, + 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364, + 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000, + 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636, + 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000}, + {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198, + 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786, + 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065, + 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576, + 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562, + 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805, + 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241, + 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759, + 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195, + 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438, + 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424, + 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935, + 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214, + 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802, + 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021}); +} + +TEST(LogisticTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape = {1}; + std::vector input_data{10}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(LogisticTest, IvalidQuantParam_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Shape input_shape = {2}; + std::vector input_data{-10, 10}; + std::pair input_quant_param = quantizationParams(-10, 10); + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp new file mode 100644 index 0000000..8d9760f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MaxPool2D.h" + +#include "kernels/Utils.h" + +#include +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void MaxPool2D::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + assert(input()->shape().num_dims() == 4); + const Shape &input_shape = input()->shape(); + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + + _padding_height = + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + _padding_width = + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + + output()->resize({batches, output_height, output_width, depth}); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } +} + +void MaxPool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalSInt16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void MaxPool2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void MaxPool2D::evalQuantized() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void MaxPool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::MaxPool( + params, getTensorShape(input()), getTensorData(input()), // + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h new file mode 100644 index 0000000..bb76663 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H +#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class MaxPool2D : public KernelWithParams +{ +public: + MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalSInt16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp new file mode 100644 index 0000000..44f2a22 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MaxPool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MaxPool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MaxPool2DTest, Float) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector input_data{ + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 1, 2, // + 5, 6, // + }; + std::initializer_list ref_output_shape{1, 2, 2, 1}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MaxPool2DTest, Uint8) +{ + std::pair quant_param = quantizationParams(-15.9375, 15.9375); + std::vector input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.0, 6.0}; + std::initializer_list ref_output_shape{1, 1, 2, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MaxPool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector ref_output_shape{1, 2, 2, 1}; + std::vector input_data{ + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // + }; + std::vector ref_output_data{ + 1, 2, // + 5, 6, // + }; + + Tensor input_tensor = + makeInputTensor(input_shape, 0.2, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp new file mode 100644 index 0000000..b102b5e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Maximum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Maximum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMaximum(); + break; + case DataType::U8: + evalMaximum(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template inline void Maximum::evalMaximum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), + [](T x, T y) { return std::max(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h new file mode 100644 index 0000000..3c99e69 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H +#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Maximum : public Kernel +{ +public: + Maximum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template inline void evalMaximum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp new file mode 100644 index 0000000..e4a505b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MaximumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MaximumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MaximumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector input_data1{1, 0, 2, 11, 2, 23}; + std::vector input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp new file mode 100644 index 0000000..8e65e0d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mean.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params) +{ + params->axis_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + params->axis[i] = static_cast(axes_data[i]); + } + for (int i = num_axes; i < 4; ++i) + { + params->axis[i] = 1; + } +} + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms) + : KernelWithParams({input, axes}, {output, temp_index, resolved_axes, temp_sum}, + params) +{ +} + +void Mean::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData(axes()); + int num_axes = axes()->shape().num_elements(); + assert(num_axes <= 4); + + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + _need_temporaries = !( + _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1))); + if (_need_temporaries) + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); + temp_sum->resize(output()->shape()); + } + else + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->set_allocatable(false); + resolved_axes->set_allocatable(false); + temp_sum->set_allocatable(false); + } +} + +void Mean::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Mean::evalFloat() const +{ + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + const auto *axes_data = getTensorData(axes()); + int num_axes = axes()->shape().num_elements(); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || + (params.axis[0] == 2 && params.axis[1] == 1))) + { + tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Mean(getTensorData(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData(temp_index), getTensorData(resolved_axes), + getTensorData(temp_sum)); + } +} + +void Mean::evalQuantized() const +{ + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + const auto *axes_data = getTensorData(axes()); + int num_axes = axes()->shape().num_elements(); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || + (params.axis[0] == 2 && params.axis[1] == 1))) + { + tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData(input()), + input()->zero_point(), input()->scale(), getTensorShape(output()), + getTensorData(output()), output()->zero_point(), + output()->scale()); + } + else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale()) + { + tflite::reference_ops::Mean(getTensorData(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData(temp_index), getTensorData(resolved_axes), + getTensorData(temp_sum)); + } + else + { + tflite::reference_ops::QuantizedMeanOrSum<>( + getTensorData(input()), input()->zero_point(), input()->scale(), + getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData(output()), output()->zero_point(), output()->scale(), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, + _params.keep_dims, getTensorData(temp_index), getTensorData(resolved_axes), + getTensorData(temp_sum), + /*compute_sum=*/false); + } +} + +void Mean::evalQuantizedS16() const +{ + const auto *input_data = getTensorData(input()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &output_shape = output()->shape(); + + const auto *axes_data = getTensorData(axes()); + const int num_axes = axes()->shape().num_elements(); + + constexpr int32_t output_min = -std::numeric_limits::max(); + constexpr int32_t output_max = std::numeric_limits::max(); + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 && + ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1))) + { + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + assert(output_shape.num_dims() == 4); + assert(output_shape.dim(0) == batches); + assert(output_shape.dim(1) == 1); + assert(output_shape.dim(2) == 1); + assert(output_shape.dim(3) == depth); + + const double real_multiplier = + static_cast(input()->scale()) / static_cast(output()->scale()); + + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + const int32_t num_elements_in_axes = input_height * input_width; + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t c = 0; c < depth; ++c) + { + int32_t acc = 0; + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)]; + } + } + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + // Divide by the number of elements rounding to the nearest integer. + scaled_acc = scaled_acc > 0 + ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes + : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes; + + scaled_acc = std::max(scaled_acc, output_min); + scaled_acc = std::min(scaled_acc, output_max); + + output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc; + } + } + } + else + { + throw std::runtime_error("Unsupported configuration."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h new file mode 100644 index 0000000..ed07ae5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H +#define LUCI_INTERPRETER_KERNELS_MEAN_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class Mean : public KernelWithParams +{ +public: + Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + bool _need_temporaries = false; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MEAN_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp new file mode 100644 index 0000000..d2c0093 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mean.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MeanTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MeanTest, FloatKeepDims) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{10.5, 12.5, 14.5}; + std::initializer_list ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, FloatKeepDims4DMean) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector axis_data{1, 2}; + Tensor input_tensor = + makeInputTensor({2, 2, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{6, 7, 18, 19}; + std::initializer_list ref_output_shape{2, 1, 1, 2}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, FloatNotKeepDims) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{12, 13}; + std::initializer_list ref_output_shape{2}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, Uint8KeepDims) +{ + float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); + std::vector input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + + std::vector axis_data{1}; + Tensor input_tensor = makeInputTensor({3, 2}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.3, 0.35, 0.55}; + std::initializer_list ref_output_shape{3, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, Uint8NotKeepDims) +{ + float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); + std::vector input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + + std::vector axis_data{1}; + Tensor input_tensor = makeInputTensor( + {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + ReducerParams params{}; + params.keep_dims = false; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.4, 0.4}; + std::initializer_list ref_output_shape{1, 2}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, SInt16KeepDims4D) +{ + std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + std::vector axes_data{1, 2}; + std::vector ref_output_data{6, 7, 18, 19}; + + Tensor input_tensor = + makeInputTensor({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get()); + Tensor axes_tensor = makeInputTensor({2}, axes_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp new file mode 100644 index 0000000..5d3dcde --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Minimum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Minimum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMinimum(); + break; + case DataType::U8: + evalMinimum(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template inline void Minimum::evalMinimum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), + [](T x, T y) { return std::min(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h new file mode 100644 index 0000000..5ff4035 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H +#define LUCI_INTERPRETER_KERNELS_MINIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Minimum : public Kernel +{ +public: + Minimum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template inline void evalMinimum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp new file mode 100644 index 0000000..9a14364 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MinimumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MinimumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MinimumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector input_data1{1, 0, 2, 11, 2, 23}; + std::vector input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp new file mode 100644 index 0000000..bae1eac --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" + +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms) + : KernelWithParams({input, paddings}, {output}, params) +{ +} + +void MirrorPad::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +template +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output); + +void MirrorPad::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + MirrorPadImpl(*input(), *paddings(), params().mode, *output()); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits::min()); + assert(output()->zero_point() <= std::numeric_limits::max()); + + MirrorPadImpl(*input(), *paddings(), params().mode, *output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +template +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output) +{ + auto const input_dims = input.shape().num_dims(); + auto const input_data = input.data(); + auto const paddings_data = paddings.data(); + auto const output_data = output.data(); + + auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1; + auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1; + auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1; + auto const input_d = input.shape().dim(input_dims - 1); + + auto const input_h_offset = input_d * input_w; + auto const input_b_offset = input_h_offset * input_h; + + auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1; + auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1; + auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1; + auto const output_d = output.shape().dim(input_dims - 1); + + auto const left_b_pad = paddings_data[2 * (input_dims - 4)]; + auto const left_h_pad = paddings_data[2 * (input_dims - 3)]; + auto const left_w_pad = paddings_data[2 * (input_dims - 2)]; + auto const left_d_pad = paddings_data[2 * (input_dims - 1)]; + + auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1]; + auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1]; + auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1]; + auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1]; + + const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; }; + const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h, + auto b) { + return d + w * input_d + h * input_h_offset + b * input_b_offset; + }; + + const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) { + bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1; + return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input); + }; + + const T *in_ptr = input_data; + T *out_ptr = output_data; + + for (int32_t b = 0; b < output_b; ++b) + { + for (int32_t h = 0; h < output_h; ++h) + { + for (int32_t w = 0; w < output_w; ++w) + { + for (int32_t d = 0; d < output_d; ++d) + { + if (b < left_b_pad || b >= output_b - right_b_pad || // + h < left_h_pad || h >= output_h - right_h_pad || // + w < left_w_pad || w >= output_w - right_w_pad || // + d < left_d_pad || d >= output_d - right_d_pad) + { + if (mode == MirrorPadMode::REFLECT) + { + *out_ptr++ = input_data[offset_index( + positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w), + positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))]; + } + else + { + *out_ptr++ = input_data[offset_index( + symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w), + symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))]; + } + } + else + { + *out_ptr++ = *in_ptr++; + } + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h new file mode 100644 index 0000000..d3e6e85 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H +#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class MirrorPad : public KernelWithParams +{ +public: + MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp new file mode 100644 index 0000000..740d8cb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MirrorPadTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode) + { + MirrorPadParams params{}; + params.mode = mode; + + MirrorPad kernel(&input, &padding, &output, params); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MirrorPadTest, FloatReflect) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; // + std::initializer_list ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 3.0, 3.0, 4.0, 4.0, 3.0, // + 3.0, 3.0, 4.0, 4.0, 3.0}; // + std::initializer_list ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric2Dim) +{ + Shape input_shape = {3, 1}; + Shape padding_shape = {2, 2}; + + std::vector input_data{1.0f, 2.0f, 3.0f}; + std::vector padding_data{1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0}; + std::initializer_list ref_output_shape{6, 1}; + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Reflect) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair quant_param = quantizationParams(0.0f, 6.0f); + + std::vector input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector ref_output_data{ + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + }; + std::initializer_list ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Symmetric) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair quant_param = quantizationParams(0.0f, 6.0f); + + std::vector input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector ref_output_data{ + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + }; + std::initializer_list ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, UnsupportedDim_NEG) +{ + Tensor input_tensor = + makeInputTensor({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +TEST_F(MirrorPadTest, InvalidInputType_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor padding_tensor = makeInputTensor({1, 2}, {0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp new file mode 100644 index 0000000..531fb4f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mul.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include "PALMul.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms) + : KernelWithParams({input1, input2}, {output}, params) +{ +} + +void Mul::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type()); + if (input1()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1) + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); + } + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Mul::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Mul::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +template void Mul::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +void Mul::evalQuantizedS16() const +{ + const auto input1_scale = static_cast(input1()->scale()); + const auto input2_scale = static_cast(input2()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const double real_multiplier = input1_scale * input2_scale / output_scale; + + int32_t output_multiplier; + int output_shift; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val, + int16_t input2_val) { + int32_t output = static_cast(input1_val) * static_cast(input2_val); + output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift); + output = std::max(output, activation_min); + output = std::min(output, activation_max); + return static_cast(output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), fn); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h new file mode 100644 index 0000000..c0cf817 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MUL_H +#define LUCI_INTERPRETER_KERNELS_MUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class Mul : public KernelWithParams +{ +public: + Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantizedS16() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp new file mode 100644 index 0000000..fc0e606 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(MulTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> test_outputs = { + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + std::vector input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } +} + +template void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + dtype max_value = std::numeric_limits::max(); + dtype res_max = max_value - max_value % 10; + + std::vector> test_outputs = { + {8, 0, 20, 0, 4, 30, // + 16, 0, 40, 3, 8, 0, // + 0, 0, 0, 6, 0, 0, // + 4, 0, 10, 9, 2, 0, // + 40, 0, 100, 0, 20, 150, // + 28, 0, 70, 0, 14, res_max}, + {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max}, + {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2, + 70, res_max}, + {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}}; + std::vector input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10}; + std::vector input2_data{4, 0, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(MulTest, SInt64) +{ + checkInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt32) +{ + checkInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector> ref_outputs = { + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(MulTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(MulTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + MulParams params{}; + params.activation = Activation::NONE; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp new file mode 100644 index 0000000..c6fe08a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/Utils.h" + +#include "PALNeg.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Neg::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void Neg::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Neg::evalFloat() const +{ + luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h new file mode 100644 index 0000000..69fa1a1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NEG_H +#define LUCI_INTERPRETER_KERNELS_NEG_H + +#include "core/Kernel.h" +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class Neg : public Kernel +{ +public: + Neg(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp new file mode 100644 index 0000000..8b2bc1a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Neg kernel(&input_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(NegTest, FloatSimple) +{ + Check(/*input_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, -1.0f, -3.0f, // Row 1 + -1.0f, 1.0f, 2.0f, // Row 2 + }); + + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp new file mode 100644 index 0000000..54e5eee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void NotEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void NotEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void NotEqual::evalFloat() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template void NotEqual::evalInteger() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void NotEqual::evalQuantized() const +{ + const auto x_data = getTensorData(x()); + const auto y_data = getTensorData(y()); + auto output_data = getTensorData(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h new file mode 100644 index 0000000..d2aafe8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class NotEqual : public Kernel +{ +public: + NotEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp new file mode 100644 index 0000000..45bf402 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class NotEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(NotEqualTest, FloatSimple) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, false, true, // Row 1 + true, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(NotEqualTest, FloatBroardcast) +{ + std::vector x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector ref_output_data{ + true, false, true, // Row 1 + true, true, true, // Row 2 + true, true, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{min_value, 2, max_value}; + + std::vector y_data{min_value, -2, max_value}; + + std::vector ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + dtype min_value = std::numeric_limits::min(); + dtype max_value = std::numeric_limits::max(); + std::vector x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + true, false, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(NotEqualTest, Int32) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(NotEqualTest, Int64) +{ + checkIntegerSimple(_memory_manager.get()); + checkIntegerBroadcast(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(NotEqualTest, Uint8Quantized) +{ + std::vector x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector ref_output_data{ + true, false, true, true, // Row 1 + true, false, false, true, // Row 2 + }; + + std::pair x_quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair y_quant_param = quantizationParams(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Uint8QuantizedBroadcast) +{ + std::vector x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector ref_output_data{ + true, true, true, true, // Row 1 + true, true, false, true, // Row 2 + true, true, true, true, // Row 3 + false, false, false, false, // Row 4 + }; + + std::pair quant_param = quantizationParams(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp new file mode 100644 index 0000000..4d3e5f2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template +void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor, + const Tensor *off_value_tensor, int32_t depth, int32_t axis, + Tensor *output_tensor) +{ + // define input shape and correct axis + auto const &input_shape = indices_tensor->shape(); + axis = axis == -1 ? input_shape.num_dims() : axis; + + // TODO support other integer input types + auto const *indices = getTensorData(indices_tensor); + auto const on_value = getTensorData(on_value_tensor)[0]; + auto const off_value = getTensorData(off_value_tensor)[0]; + auto *output = getTensorData(output_tensor); + + // prefix_dim_size == # of elements before the axis + // depth == # of elements per axis + // suffix_dim_size == # of elements after the axis + auto prefix_dim_size = 1; + for (int32_t i = 0; i < axis; ++i) + { + prefix_dim_size *= input_shape.dim(i); + } + assert(prefix_dim_size > 0); + auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size; + + // View the indices as a matrix of size: + // prefix_dim_size x suffix_dim_size + // View the output as a matrix of size: + // prefix_dim_size x depth x suffix_dim_size + // Then the output is: + // output(i, j, k) == (indices(i, k) == j) ? on : off + for (int32_t i = 0; i < prefix_dim_size; ++i) + for (int32_t j = 0; j < depth; ++j) + for (int32_t k = 0; k < suffix_dim_size; ++k, ++output) + *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value; +} + +} // namespace + +OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms) + : KernelWithParams({indices, depth, on_value, off_value}, {output}, params) +{ + // Do nothing +} + +void OneHot::configure() +{ + // check types + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type()); + + // check shape dependent parameters + LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims()); + + // define parameters that affect the output shape + auto const depth_value = getTensorData(depth())[0]; + auto const &input_shape = indices()->shape(); + auto const input_dims = input_shape.num_dims(); + auto const axis = params().axis == -1 ? input_dims : params().axis; + + // define output shape + Shape output_shape(input_shape.num_dims() + 1); + { + for (int32_t d = 0; d < axis; ++d) + output_shape.dim(d) = input_shape.dim(d); + + output_shape.dim(axis) = depth_value; + + for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d) + output_shape.dim(d) = input_shape.dim(d - 1); + } + + // reshape output + output()->resize(output_shape); +} + +void OneHot::execute() const +{ + auto const depth_value = getTensorData(depth())[0]; + auto const axis = params().axis; + + switch (output()->element_type()) + { + case loco::DataType::FLOAT32: + OneHotComputeImpl(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::U8: + OneHotComputeImpl(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::S16: + OneHotComputeImpl(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + default: + // TODO Support other data types + throw std::runtime_error("Not supported, yet!"); + break; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h new file mode 100644 index 0000000..572f857 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H +#define LUCI_INTERPRETER_KERNELS_ONEHOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class OneHot : public KernelWithParams +{ +public: + OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms); + + const Tensor *indices() const { return _inputs[0]; } + const Tensor *depth() const { return _inputs[1]; } + const Tensor *on_value() const { return _inputs[2]; } + const Tensor *off_value() const { return _inputs[3]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp new file mode 100644 index 0000000..45b6968 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list depth_data, + std::initializer_list on_value_data, std::initializer_list off_value_data, + int32_t axis, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + constexpr auto input_type = getElementType(); + constexpr auto output_type = getElementType(); + + Tensor input_tensor = makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor depth_tensor = makeInputTensor({}, depth_data, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor({}, on_value_data, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor({}, off_value_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + OneHotParams params{}; + params.axis = axis; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template class OneHotTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(OneHotTest, DataTypes); + +TYPED_TEST(OneHotTest, BasicPattern) +{ + // axis 0 + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/0, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 1, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 1, 0, // + 0, 1, 0, // + }); + // axis 1 + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/1, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + + 0, 0, 1, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + }); + // axis -1 + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/-1, + /*output_data=*/ + { + 1, 0, 0, 0, // + 0, 0, 0, 1, // + 0, 0, 0, 0, // + + 0, 0, 0, 0, // + 0, 0, 0, 1, // + 1, 0, 0, 0, // + }); +} + +TEST(OneHotTest, UnsupportedInputType_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + // input type should be integer + Tensor input_tensor = makeInputTensor({1}, {0}, memory_manager.get()); + + Tensor depth_tensor = makeInputTensor({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, OutputTypeMismatch_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor({}, {1}, memory_manager.get()); + + // type of on_value, off_value and output_tensor should be same + Tensor on_value_tensor = makeInputTensor({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, InvalidAxis_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + // axis should be in [-1, input_shape.rank] + OneHotParams params = {-2}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp new file mode 100644 index 0000000..5a6b05c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +PRelu::~PRelu() +{ + // Destructor declared to delete vector of alpha quantized data properly +} + +void PRelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1); + LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1); + + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives + _alpha_multipliers.resize(1); + double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier, + &_alpha_multipliers[0].shift); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + else if (input()->element_type() == DataType::S16) + { + // Common check for correctness of quant params + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel) + { + LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0); + } + // PRelu specific checks for CWQ + LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1); + LUCI_INTERPRETER_CHECK(static_cast(alpha()->scales().size()) == + alpha()->shape().dim(alpha()->quantized_dimension())); + LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() == + input()->shape().dim(input()->shape().num_dims() - 1)); + + // all dimension of alpha except last one should be size 1 + for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim) + { + LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1); + } + + std::vector real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale()); + + _alpha_multipliers = quantizeMultipliers(real_multipliers); + + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape())); +} + +void PRelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void PRelu::evalFloat() const +{ + const auto input_data = getTensorData(input()); + const auto alpha_data = getTensorData(alpha()); + const auto size = getTensorShape(input()).FlatSize(); + auto output_data = getTensorData(output()); + + auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; }; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow( + getTensorShape(input()), getTensorData(input()), getTensorShape(alpha()), + getTensorData(alpha()), getTensorShape(output()), getTensorData(output()), + PReluFunc); + } + else + { + for (auto i = decltype(size){0}; i < size; ++i) + { + if (input_data[i] >= 0) + output_data[i] = input_data[i]; + else + output_data[i] = input_data[i] * alpha_data[i]; + } + } +} + +void PRelu::evalQuantized() const +{ + tflite::PreluParams op_params{}; + + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_shift_1 = _output_shift_identity; + op_params.output_multiplier_1 = _output_multiplier_identity; + op_params.output_shift_2 = _alpha_multipliers[0].shift; + op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastPrelu4DSlow( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(alpha()), + getTensorData(alpha()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Prelu( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(alpha()), + getTensorData(alpha()), getTensorShape(output()), getTensorData(output())); + } +} + +static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val, + const ChannelQuantMultipliers &identity_mult, + const ChannelQuantMultipliers &alpha_mult) +{ + constexpr int32_t quantized_min = std::numeric_limits::min(); + constexpr int32_t quantized_max = std::numeric_limits::max(); + + const int32_t output_val = + input_val >= 0 + ? tflite::MultiplyByQuantizedMultiplier(static_cast(input_val), + identity_mult.multiplier, identity_mult.shift) + : tflite::MultiplyByQuantizedMultiplier(static_cast(input_val * alpha_val), + alpha_mult.multiplier, alpha_mult.shift); + const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val)); + return clamped_output; +} + +void PRelu::evalQuantizedS16() const +{ + // Note that this kernel assumes alpha is CWQ + tflite::RuntimeShape input_shape = getTensorShape(input()); + const int16_t *input_data = input()->data(); + const int16_t *alpha_data = alpha()->data(); + int16_t *output_data = output()->data(); + + const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity}; + + const int last_dim = input()->shape().num_dims() - 1; + + int32_t outer_dims_size = 1; + for (int i = 0; i < last_dim; ++i) + outer_dims_size *= input_shape.Dims(i); + int32_t quant_dim_size = input_shape.Dims(last_dim); + + for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims) + for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel) + { + const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel]; + size_t offset = static_cast(outer_dims) * static_cast(quant_dim_size); + offset += quant_channel; + + output_data[offset] = + evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h new file mode 100644 index 0000000..f7735d4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H +#define LUCI_INTERPRETER_KERNELS_PRELU_H + +#include "core/Kernel.h" +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class ChannelQuantMultipliers; + +class PRelu : public Kernel +{ +public: + PRelu(const Tensor *input, const Tensor *alpha, Tensor *output); + + ~PRelu(); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *alpha() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + std::vector _alpha_multipliers; + // TODO merge this into one ChannelQuantMultiplier object + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PRELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp new file mode 100644 index 0000000..6d97382 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list alpha_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list alpha_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor(alpha_shape, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(PReluTest, FloatSimple) +{ + Check(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*alpha_data=*/ + { + 0.0f, 0.5f, 0.1f, // Row 1 + 0.0f, 0.5f, 0.1f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -0.2f, // Row 2 + }); + + SUCCEED(); +} + +TEST(PReluTest, FloatBroadcast) +{ + Check(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3}, + /*output_shape=*/{1, 2, 2, 3}, + /*input_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -2.0f, -2.0f, -2.0f, // Row 2, Column 2 + }, + /*alpha_data=*/ + {0.0f, 1.0f, 2.0f}, + /*output_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + 0.0f, -1.0f, -2.0f, // Row 2, Column 1 + 0.0f, -2.0f, -4.0f, // Row 2, Column 2 + }); + + SUCCEED(); +} + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PReluTest, Uint8Simple) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f}; + std::vector alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f}; + std::vector ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f}; + + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + + Tensor input_tensor = makeInputTensor( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor( + {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1})); + + SUCCEED(); +} + +TEST(PReluTest, Uint8Broadcast) +{ + std::vector input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector alpha_data{0.0f, 0.5f, -0.5f}; + std::vector ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + std::vector ref_quant_output_data{ + 128, 128, 128, // Row 1, Column 1 + 192, 192, 192, // Row 1, Column 2 + 128, 64, 192, // Row 2, Column 1 + 128, 112, 144 // Row 2, Column 2 + }; + float kQuantizedTolerance = 2 * (1. / 256); + const float kMin = -1; + const float kMax = 127.f / 128.f; + std::pair quant_param = quantizationParams(kMin, kMax); + + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor( + {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor( + {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_quant_output_data)); +} + +TEST(PReluTest, SInt16_LWQ_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + // Rewrite this test in case layer-wise quantization for sint16 is supported + std::vector input_data(6); // data is not important + std::vector alpha_data(6); + + Tensor input_tensor = + makeInputTensor({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_Simple) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector alpha_data{0.5f, 0.25f}; + std::vector ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector alpha_scales{0.05f, 0.025f}; + std::vector zerop{0, 0}; + Tensor input_tensor = + makeInputTensor({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data(6); // data is not important + std::vector alpha_data(6); + + std::vector alpha_scales{0.25f, 0.05f}; + std::vector zerop{0, 0}; + Tensor input_tensor = + makeInputTensor({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1, 1, 3, 2}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data(6); // data is not important + std::vector alpha_data(6); + + std::vector alpha_scales{0.25f}; + std::vector zerop{0}; + Tensor input_tensor = + makeInputTensor({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1, 1, 1, 2}, alpha_scales, zerop, 1, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape1) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector alpha_data{0.5f, 0.25f}; + std::vector ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector alpha_scales{0.05f, 0.025f}; + std::vector zerop{0, 0}; + Tensor input_tensor = + makeInputTensor({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1, 1, 2}, alpha_scales, zerop, 2, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape2) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector alpha_data{0.0f, 0.5f, -0.5f}; + std::vector ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + + std::vector alpha_scales{1.f, 0.05f, 0.1f}; + std::vector zerop{0, 0, 0}; + Tensor input_tensor = + makeInputTensor({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1, 1, 1, 3}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, Input_Output_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Alpha_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST(PReluTest, Input_Output_U8_CWQ_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector scales{1.f, 1.f}; + std::vector zerop{0, 0}; + std::vector dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Output_S16_CWQ_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector scales{1.f, 1.f}; + std::vector zerop{0, 0}; + std::vector dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Mixing_U8_S16_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp new file mode 100644 index 0000000..42aab33 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Pack::Pack(std::vector inputs, Tensor *output, const PackParams ¶ms) + : KernelWithParams(std::move(inputs), {output}, params) +{ +} + +void Pack::configure() +{ + LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast(params().values_count)); + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims()); + + if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 && + t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 && + t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64) + { + throw std::runtime_error("Unsupported type."); + } + + for (uint32_t i = 1; i < _inputs.size(); ++i) + { + const Tensor *tensor = _inputs[i]; + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); + for (int d = 0; d < t0->shape().num_dims(); ++d) + { + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); + } + } + + Shape output_shape(dimension_size); + int i = 0; + for (int index = 0; index < dimension_size; ++index) + { + if (index == axis) + { + output_shape.dim(index) = params().values_count; + } + else + { + output_shape.dim(index) = t0->shape().dim(i++); + } + } + + if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 || + t0->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale()); + // Guarantee input/output quantization params match as we do not support + // packing quantized tensors. + for (int i = 0; i < params().values_count; i++) + { + LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale()); + } + } + + output()->resize(output_shape); +} + +void Pack::execute() const +{ + switch (_inputs[0]->element_type()) + { + case DataType::FLOAT32: + evalGeneric(); + break; + case DataType::U8: + evalGeneric(); + break; + case DataType::S8: + evalGeneric(); + break; + case DataType::S16: + evalGeneric(); + break; + case DataType::S32: + evalGeneric(); + break; + case DataType::S64: + evalGeneric(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void Pack::evalGeneric() const +{ + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + + VectorOfTensors inputs(_inputs); + tflite::PackParams params{}; + params.axis = axis; + params.inputs_count = _inputs.size(); + tflite::reference_ops::Pack(params, inputs.shapes(), inputs.data(), getTensorShape(output()), + getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h new file mode 100644 index 0000000..4a2fcfd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PACK_H +#define LUCI_INTERPRETER_KERNELS_PACK_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pack : public KernelWithParams +{ +public: + Pack(std::vector inputs, Tensor *output, const PackParams ¶ms); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void evalGeneric() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PACK_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp new file mode 100644 index 0000000..d16320b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::vector> input_shapes, + std::initializer_list output_shape, std::vector> input_datas, + std::initializer_list output_data, int32_t axis) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + std::vector inputs(input_datas.size()); + std::vector tmp_inputs; + for (int i = 0; i < input_datas.size(); i++) + { + if (std::is_same::value || std::is_same::value || + std::is_same::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else if (std::is_same::value || std::is_same::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else + { + assert((std::is_same::value) && "unexpected dtype is tested"); + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + } + for (int i = 0; i < input_datas.size(); i++) + { + inputs[i] = &tmp_inputs[i]; + } + + Tensor output_tensor = makeOutputTensor(element_type); + if (std::is_same::value || std::is_same::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128); + } + else if (std::is_same::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f, 0); + } + + PackParams params{}; + params.axis = axis; + params.values_count = input_datas.size(); + Pack kernel(inputs, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template class PackTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(PackTest, DataTypes); + +TYPED_TEST(PackTest, ThreeInputs) +{ + Check(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{3, 2}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 4, 2, 5, 3, 6}, /*axis=*/0); + + SUCCEED(); +} + +TYPED_TEST(PackTest, NegAxis) +{ + Check(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{2, 3}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 2, 3, 4, 5, 6}, /*axis=*/-1); + + SUCCEED(); +} + +TEST(Pack, MismatchingInputValuesCount_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input1_data{1, 4}; + std::vector input2_data{2, 5}; + std::vector input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 0; + params.values_count = 2; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +TEST(Pack, InvalidInputAxis_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input1_data{1, 4}; + std::vector input2_data{2, 5}; + std::vector input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 2; + params.values_count = 3; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp new file mode 100644 index 0000000..c07f6e3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pad.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output) + : Kernel({input, paddings}, {output}) +{ +} + +void Pad::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +void Pad::execute() const +{ + const int num_dims = input()->shape().num_dims(); + + tflite::PadParams params{}; + params.left_padding_count = num_dims; + params.right_padding_count = num_dims; + + const auto *paddings_data = getTensorData(paddings()); + for (int i = num_dims - 1; i >= 0; --i) + { + params.left_padding[i] = paddings_data[i * 2]; + params.right_padding[i] = paddings_data[i * 2 + 1]; + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + const float pad_value = 0.0f; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData(input()), + &pad_value, getTensorShape(output()), + getTensorData(output())); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits::min()); + assert(output()->zero_point() <= std::numeric_limits::max()); + const auto pad_value = static_cast(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData(input()), + &pad_value, getTensorShape(output()), + getTensorData(output())); + break; + } + case DataType::S8: + { + assert(output()->zero_point() >= std::numeric_limits::min()); + assert(output()->zero_point() <= std::numeric_limits::max()); + const auto pad_value = static_cast(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData(input()), + &pad_value, getTensorShape(output()), + getTensorData(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h new file mode 100644 index 0000000..e05b47f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PAD_H +#define LUCI_INTERPRETER_KERNELS_PAD_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pad : public Kernel +{ +public: + Pad(const Tensor *input, const Tensor *paddings, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PAD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp new file mode 100644 index 0000000..dd3ce94 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(Pad, Uint8) +{ + std::unique_ptr memory_manager = std::make_unique(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + std::vector input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; + std::vector paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; + Tensor input_tensor = makeInputTensor( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); +} + +TEST(Pad, Int8) +{ + std::unique_ptr memory_manager = std::make_unique(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + std::vector input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2}; + std::vector paddings_data{0, 0, 1, 2, 2, 1, 0, 0}; + Tensor input_tensor = makeInputTensor( + {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0, + 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1})); +} + +TEST(Pad, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6}; + std::vector paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; + Tensor input_tensor = + makeInputTensor({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::initializer_list ref_output_shape{2, 4, 6, 1}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp new file mode 100644 index 0000000..197cdaa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, + Tensor *output) + : Kernel({input, paddings, constant_values}, {output}) +{ +} + +void PadV2::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + assert(constant_values()->element_type() == output()->element_type()); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + // Constant values elements number should be 1. + assert(constant_values()->shape().num_elements() == 1); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +void PadV2::execute() const +{ + const int num_dims = input()->shape().num_dims(); + + tflite::PadParams params{}; + params.left_padding_count = num_dims; + params.right_padding_count = num_dims; + + const auto *paddings_data = getTensorData(paddings()); + for (int i = num_dims - 1; i >= 0; --i) + { + params.left_padding[i] = paddings_data[i * 2]; + params.right_padding[i] = paddings_data[i * 2 + 1]; + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + const auto pad_value = getTensorData(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData(input()), + &pad_value, getTensorShape(output()), + getTensorData(output())); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits::min()); + assert(output()->zero_point() <= std::numeric_limits::max()); + const auto pad_value = getTensorData(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData(input()), + &pad_value, getTensorShape(output()), + getTensorData(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h new file mode 100644 index 0000000..48a31f5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H +#define LUCI_INTERPRETER_KERNELS_PAD_V2_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class PadV2 : public Kernel +{ +public: + PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + const Tensor *constant_values() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp new file mode 100644 index 0000000..41efaff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PadV2, Uint8) +{ + std::unique_ptr memory_manager = std::make_unique(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair quant_param = quantizationParams(-1.0f, 1.0f); + std::vector input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; + std::vector paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; + std::vector constant_values_data{0.5}; + Tensor input_tensor = makeInputTensor( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = makeInputTensor( + {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data = { + 0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5, // + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}; // + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); +} + +TEST(PadV2, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + std::vector input_data{1, 2, 3, 4, 5, 6}; + std::vector paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; + std::vector constant_values_data{7}; + Tensor input_tensor = + makeInputTensor({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = + makeInputTensor({1}, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + std::initializer_list ref_output_shape{2, 4, 6, 1}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp new file mode 100644 index 0000000..722c640 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Pow::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Pow::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + eval(); + break; + case DataType::S32: + eval(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void Pow::eval() const +{ + tflite::ArithmeticParams params{}; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h new file mode 100644 index 0000000..8ff865e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_POW_H +#define LUCI_INTERPRETER_KERNELS_POW_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pow : public Kernel +{ +public: + Pow(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void eval() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_POW_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp new file mode 100644 index 0000000..0e85811 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class PowTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(PowTest, SimplePow) +{ + std::initializer_list base_shape = {1, 1, 3, 2}; + + std::vector input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f}; + + Tensor input1_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(base_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, FloatBroadcastPow) +{ + std::initializer_list input1_shape = {1, 3}; + std::initializer_list input2_shape = {3, 1}; + + std::vector input1_data{0.3f, 2.3f, 0.9f}; + std::vector input2_data{0.2f, 0.3f, 0.4f}; + std::vector test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f, + 0.96888f, 0.6178f, 1.3953f, 0.9587f}; + + Tensor input1_tensor = + makeInputTensor(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(PowTest, IntPow) +{ + std::initializer_list base_shape = {1, 3}; + + std::vector input_data{2, 3, 4}; + std::vector test_outputs{4, 27, 256}; + + Tensor input1_tensor = + makeInputTensor(base_shape, input_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(base_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(test_outputs)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {1.0f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Input_Type_Mismatch_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {4}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp new file mode 100644 index 0000000..0c8544a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/Utils.h" +#include "PALQuantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template void call_requantize(const Tensor *input, Tensor *output) +{ + int32_t multiplier; + int shift; + + const double effective_output_scale = input->scale() / output->scale(); + quantizeMultiplier(effective_output_scale, &multiplier, &shift); + + const auto input_shape = getTensorShape(input); + const auto output_shape = getTensorShape(output); + const auto size = tflite::MatchingFlatSize(input_shape, output_shape); + + const auto input_data = getTensorData(input); + + switch (output->element_type()) + { + case loco::DataType::S8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData(output)); + break; + case loco::DataType::U8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData(output)); + break; + case loco::DataType::S16: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData(output)); + break; + default: + throw std::runtime_error("Unsupported quantized type, yet!"); + } +} + +} // namespace + +Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Quantize::configure() +{ + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::S16); + break; + } + case loco::DataType::S16: + case loco::DataType::S8: + case loco::DataType::U8: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S16); + if (output()->element_type() == loco::DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == 0); + } + break; + } + default: + throw std::runtime_error("Unsupported type"); + } + + output()->resize(input()->shape()); +} + +void Quantize::execute() const +{ + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + tflite::QuantizationParams op_params; + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + const auto input_data = getTensorData(input()); + + switch (output()->element_type()) + { + case loco::DataType::S8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), getTensorData(output())); + break; + } + case loco::DataType::U8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } + break; + } + case loco::DataType::S16: + { + call_requantize(input(), output()); + break; + } + case loco::DataType::S8: + { + call_requantize(input(), output()); + break; + } + case loco::DataType::U8: + { + call_requantize(input(), output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h new file mode 100644 index 0000000..006c536 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Quantize : public Kernel +{ +public: + Quantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp new file mode 100644 index 0000000..22e67fe --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class QuantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(QuantizeTest, FloatUint8) +{ + std::vector input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + Tensor input_tensor = + makeInputTensor({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt8) +{ + std::vector input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + Tensor input_tensor = + makeInputTensor({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt16) +{ + std::vector input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64}; + + std::vector ref_output_data{-12700, -12600, -600, -400, -200, + 200, 400, 600, 12700, 12800}; + + Tensor input_tensor = + makeInputTensor({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, Int16Int16) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + + Tensor input_tensor = makeInputTensor( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int8Int8) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Uint8Uint8) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147}; + + Tensor input_tensor = makeInputTensor( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int16Int8) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, InvalidInputType_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp new file mode 100644 index 0000000..747ec6c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/Utils.h" + +#include "PALRelu.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu::evalFloat() const +{ + const auto input_data = getTensorData(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data); +} + +void Relu::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast(std::numeric_limits::min()), params.output_offset); + params.quantized_activation_max = static_cast(std::numeric_limits::max()); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void Relu::evalQuantizedS16() const +{ + const auto *input_data = getTensorData(input()); + auto *output_data = getTensorData(output()); + + constexpr int32_t output_min = 0; + constexpr int32_t output_max = std::numeric_limits::max(); + + const int32_t num_elements = input()->shape().num_elements(); + + for (int32_t i = 0; i < num_elements; ++i) + { + const int32_t input_val = input_data[i]; + int32_t output_val = + tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift); + output_val = std::max(output_val, output_min); + output_val = std::min(output_val, output_max); + output_data[i] = static_cast(output_val); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h new file mode 100644 index 0000000..b813f0c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU_H +#define LUCI_INTERPRETER_KERNELS_RELU_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu : public Kernel +{ +public: + Relu(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp new file mode 100644 index 0000000..bd32e3c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReluTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(ReluTest, FloatSimple) +{ + std::vector input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(ReluTest, Uint8Quantized) +{ + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 8; + const float f_max = (127.0 / 128.0) * 8; + + std::pair quant_param = quantizationParams(f_min, f_max); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, Uint8Requantized) +{ + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 8; + const float in_max = (127.0 / 128.0) * 8; + const float out_min = (0.0 / 256.0) * 8; + const float out_max = (255.0 / 256.0) * 8; + + std::pair quant_input = quantizationParams(in_min, in_max); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair quant_output = quantizationParams(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, SInt16) +{ + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + std::vector ref_output_data{ + 0, 0, 2, 4, // + 3, 0, 7, 1, // + }; + + Tensor input_tensor = + makeInputTensor({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(ReluTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ReluTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp new file mode 100644 index 0000000..07205ed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/Utils.h" + +#include "PALRelu6.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu6::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + if (input()->element_type() == DataType::U8) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu6::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu6::evalFloat() const +{ + const auto input_data = getTensorData(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data); +} + +void Relu6::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast(std::numeric_limits::min()), params.output_offset); + params.quantized_activation_max = + std::min(static_cast(std::numeric_limits::max()), + params.output_offset + static_cast(roundf(6.f / output()->scale()))); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h new file mode 100644 index 0000000..f5030b5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H +#define LUCI_INTERPRETER_KERNELS_RELU6_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu6 : public Kernel +{ +public: + Relu6(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU6_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp new file mode 100644 index 0000000..af7b3f3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class Relu6Test : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(Relu6Test, FloatSimple) +{ + std::vector input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 7.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 6.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(Relu6Test, Uint8Quantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 10; + const float f_max = (127.0 / 128.0) * 10; + const float tolerance = (f_max - f_min) / 255.0; + + std::vector input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair quant_param = quantizationParams(f_min, f_max); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Uint8Requantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 10; + const float in_max = (127.0 / 128.0) * 10; + const float out_min = (0.0 / 256.0) * 0; + const float out_max = (255.0 / 256.0) * 6; + const float tolerance = (in_max - in_min) / 255.0; + + std::vector input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair quant_input = quantizationParams(in_min, in_max); + Tensor input_tensor = makeInputTensor( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair quant_output = quantizationParams(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu6 kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Relu6Test, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp new file mode 100644 index 0000000..61d3300 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Reshape.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +static Shape extractShapeFromTensor(const Tensor *tensor) +{ + assert(tensor->element_type() == DataType::S32); + Shape shape(tensor->shape().num_elements()); + const auto *shape_data = tensor->data(); + for (int i = 0; i < tensor->shape().num_elements(); ++i) + { + shape.dim(i) = shape_data[i]; + } + return shape; +} + +static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape) +{ + const int32_t num_input_elements = input_shape.num_elements(); + int32_t num_output_elements = 1; + int unknown_dim_index = -1; + for (int i = 0; i < output_shape->num_dims(); ++i) + { + const int32_t value = output_shape->dim(i); + if (value == -1) + { + assert(unknown_dim_index == -1); + unknown_dim_index = i; + } + else + { + num_output_elements *= value; + } + } + if (unknown_dim_index != -1) + { + output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements; + num_output_elements *= output_shape->dim(unknown_dim_index); + } + assert(num_output_elements == num_input_elements); +} + +Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output) + : Kernel({input, shape}, {output}) +{ +} + +void Reshape::configure() +{ + Shape output_shape = extractShapeFromTensor(shape()); + resolveUnknownDimension(input()->shape(), &output_shape); + output()->resize(output_shape); +} + +void Reshape::execute() const +{ + const auto *input_data = input()->data(); + auto *output_data = output()->data(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h new file mode 100644 index 0000000..99b947f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H +#define LUCI_INTERPRETER_KERNELS_RESHAPE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Reshape : public Kernel +{ +public: + Reshape(const Tensor *input, const Tensor *shape, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *shape() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp new file mode 100644 index 0000000..c2ff3ea --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Reshape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReshapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +// TODO Test types other than FLOAT32. + +TEST_F(ReshapeTest, Regular) +{ + Shape input_shape{1, 2, 2, 3}; + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{2}; + std::vector shape_data{3, 4}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(input_data)); +} + +TEST_F(ReshapeTest, UnknownDimension) +{ + Shape input_shape{2, 1, 2, 3}; + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{3}; + std::vector shape_data{2, -1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(input_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp new file mode 100644 index 0000000..e2ddd6a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" + +#include "kernels/Utils.h" + +#include "PALResizeBilinear.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output, + const ResizeBilinearParams ¶ms) + : KernelWithParams({input, size}, {output}, params) +{ +} + +void ResizeBilinear::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + if (params().half_pixel_centers && params().align_corners) + throw std::runtime_error("If half_pixel_centers is True, align_corners must be False."); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData(size())[0]; + output_shape.dim(2) = getTensorData(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeBilinear::execute() const +{ + tflite::ResizeBilinearParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(size()), + getTensorData(size()), getTensorShape(output()), getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(size()), + getTensorData(size()), getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h new file mode 100644 index 0000000..b7bdc2a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeBilinear : public KernelWithParams +{ +public: + ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeBilinearParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp new file mode 100644 index 0000000..933a112 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list size_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list size_data, std::initializer_list output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list size_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list size_data, + std::initializer_list output_data, bool align_corners, + bool half_pixel_centers) +{ + // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero + // point 0. + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, 1.0, 0, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template class ResizeBilinearTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes); + +TYPED_TEST(ResizeBilinearTest, SimpleTest) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + 4, 8, 10, // + 8, 12, 14, // + 10, 14, 16, // + }, + false, false); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterFloatTest) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 1, 2, // + 3, 4, // + 1, 2, // + 3, 4 // + }, + {3, 3}, + { + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterUint8Test) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 12, 16 // + }, + {3, 3}, + { + 2, 4, 6, // + 6, 7, 9, // + 9, 10, 12, // + 4, 7, 10, // + 8, 10, 13, // + 12, 14, 16, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, InputShapeInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeDimInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, InvalidParams_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = true; + params.half_pixel_centers = true; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp new file mode 100644 index 0000000..306cefb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" + +#include "kernels/Utils.h" + +#include +#include "PALResizeNearestNeighbor.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size, + Tensor *output, + const ResizeNearestNeighborParams ¶ms) + : KernelWithParams({input, size}, {output}, params) +{ +} + +void ResizeNearestNeighbor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData(size())[0]; + output_shape.dim(2) = getTensorData(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeNearestNeighbor::execute() const +{ + tflite::ResizeNearestNeighborParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(size()), + getTensorData(size()), getTensorShape(output()), getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData(input()), getTensorShape(size()), + getTensorData(size()), getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h new file mode 100644 index 0000000..137d031 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeNearestNeighbor : public KernelWithParams +{ +public: + ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeNearestNeighborParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp new file mode 100644 index 0000000..7ade02a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list size_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list size_data, std::initializer_list output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check(std::initializer_list input_shape, + std::initializer_list size_shape, + std::initializer_list output_shape, + std::initializer_list input_data, + std::initializer_list size_data, + std::initializer_list output_data, bool align_corners, + bool half_pixel_centers) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::pair quant_param = + quantizationParams(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + Tensor input_tensor = makeInputTensor( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template class ResizeNearestNeighborTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes); + +TYPED_TEST(ResizeNearestNeighborTest, SimpleTest) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12, // + 4, 4, 10, // + 4, 4, 10, // + 10, 10, 16, // + }, + false, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + true, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest) +{ + Check({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + false, true); +} + +TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp new file mode 100644 index 0000000..1b6a5cc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReverseV2.h" +#include "kernels/Utils.h" +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output) + : Kernel({input, axes}, {output}) +{ +} + +void ReverseV2::configure() +{ + assert(axes()->shape().num_dims() == 1); + assert(input()->shape().num_dims() >= axes()->shape().num_elements()); + if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 && + input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 && + input()->element_type() != DataType::S64) + { + throw std::runtime_error("Unsupported input type."); + } + if (axes()->element_type() != DataType::S32) + { + throw std::runtime_error("Unsupported axes type."); + } + if (axes()->shape().num_elements() > 1) + { + throw std::runtime_error("Current implementation does not support more than 1 axis."); + } + int axis_value = getTensorData(axes())[0]; + if (axis_value < 0 || axis_value >= input()->shape().num_dims()) + { + throw std::runtime_error("Invalid axes value"); + } + assert(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void ReverseV2::execute() const +{ + int axis_value = getTensorData(axes())[0]; + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Reverse(axis_value, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + tflite::reference_ops::Reverse( + axis_value, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported output type"); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h new file mode 100644 index 0000000..51211c7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H +#define LUCI_INTERPRETER_KERNELS_REVERSE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReverseV2 : public Kernel +{ +public: + ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp new file mode 100644 index 0000000..c0025fa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReverseV2.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template class ReverseV2Test : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(ReverseV2Test, DataTypes); + +TYPED_TEST(ReverseV2Test, MultiDimensions) +{ + std::unique_ptr memory_manager = std::make_unique(); + + // TypeParam + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + Shape input_shape{4, 3, 2}; + std::vector axis_data{1}; + Shape axis_shape{1}; + + std::vector output_data{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, + 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}; + std::vector output_shape{4, 3, 2}; + + Tensor input_tensor = + makeInputTensor()>(input_shape, input_data, memory_manager.get()); + Tensor axis_tensor = makeInputTensor(axis_shape, axis_data, memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(getElementType()); + + ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp new file mode 100644 index 0000000..6dd92dc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Rsqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Rsqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Rsqrt::evalFloat() const +{ + auto in = getTensorData(input()); + auto out = getTensorData(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = 1.f / std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h new file mode 100644 index 0000000..adc5bcf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H +#define LUCI_INTERPRETER_KERNELS_RSQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Rsqrt : public Kernel +{ +public: + Rsqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp new file mode 100644 index 0000000..3c64942 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(RsqrtTest, SimpleRsqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 5, 4, 8, 2, // + 6, 7.5, 9, 0.3, // + }, + /*output_data=*/ + { + 0.44721360, 0.5, 0.35355339, 0.70710678, // + 0.40824829, 0.36514837, 0.33333333, 1.8257419, // + }); +} + +TEST(RsqrtTest, Input_Output_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Rsqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(RsqrtTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp new file mode 100644 index 0000000..40d79aa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/Utils.h" +#include "PALSVDF.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ +TfLiteFusedActivation get_tflite_activation(Activation activation) +{ + switch (activation) + { + case luci::FusedActFunc::RELU: + return kTfLiteActRelu; + case luci::FusedActFunc::RELU6: + return kTfLiteActRelu6; + case luci::FusedActFunc::RELU_N1_TO_1: + return kTfLiteActReluN1To1; + case luci::FusedActFunc::TANH: + return kTfLiteActTanh; + case luci::FusedActFunc::SIGN_BIT: + return kTfLiteActSignBit; + case luci::FusedActFunc::NONE: + return kTfLiteActNone; + default: + throw std::runtime_error("Unsupported activation type"); + } +} +} // namespace + +SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms) + : KernelWithParams({input, weight_feature, weight_time, bias, input_activation_state}, + {output, scratchpad_activation_state, scratchpad_1, scratchpad_2, + scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6}, + params) +{ + // Do nothing +} + +void SVDF::configure() +{ + const Shape &input_shape = input()->shape(); + const Shape &weight_features_shape = weight_feature()->shape(); + const Shape &weight_time_shape = weight_time()->shape(); + + // Validate Input Tensor: + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 || + input()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2); + + // Validate inputs and output types + if (input()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 || + weight_time()->element_type() == loco::DataType::S8); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32); + + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 || + input_activation_state()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8); + + // Note: now tflite support only ReLU activation for integer SVDF + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU); + } + else if (weight_feature()->element_type() == loco::DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + } + else if ((weight_feature()->element_type() == loco::DataType::U8 || + weight_feature()->element_type() == loco::DataType::S8) && + input()->element_type() == loco::DataType::FLOAT32) + { + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int rank = params().svdf_rank; + const int batch_size = input_shape.dim(0); + const int num_filters = weight_features_shape.dim(0); + LUCI_INTERPRETER_CHECK(rank != 0); + LUCI_INTERPRETER_CHECK(num_filters % rank == 0); + + const int num_units = num_filters / rank; + const int memory_size = weight_time_shape.dim(1); + + // Validate Weight_Feature Input Tensor: + LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1)); + + // Validate Weight_Time Input Tensor: + LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters); + + // Validate Bias + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units); + + // Validate Input Activation State + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters); + + // Resize scratchpad_state to input_activation_state + auto scratchpad_activation_state = getOutputTensors()[1]; + scratchpad_activation_state->resize({batch_size, memory_size * num_filters}); + + // Resize output tensor + output()->resize({batch_size, num_units}); + + luci_interpreter_pal::SetupScratchpadTensor( + input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2], + getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6], + getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units); +} + +void SVDF::execute() const +{ + switch (weight_feature()->element_type()) + { + case loco::DataType::FLOAT32: + evalFloat(); + break; + case loco::DataType::S8: + { + if (input()->element_type() == loco::DataType::S8) + evalInteger(); + else + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + break; + } + default: + throw std::runtime_error("Unsupported type"); + } +} + +void SVDF::evalInteger() const +{ + const auto effective_scale_1 = static_cast(input()->scale() * weight_feature()->scale() / + input_activation_state()->scale()); + const auto effective_scale_2 = static_cast(input_activation_state()->scale() * + weight_time()->scale() / output()->scale()); + + int32_t effective_scale_1_a; + int effective_scale_1_b; + int32_t effective_scale_2_a; + int effective_scale_2_b; + + tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b); + tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b); + + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad = getOutputTensors()[2]; + auto output_temp = getOutputTensors()[3]; + + int32_t input_zp = input()->zero_point(); + int32_t output_zp = output()->zero_point(); + luci_interpreter_pal::IntegerSVDF( + params_svdf, getTensorShape(input()), getTensorData(input()), + getTensorShape(weight_feature()), getTensorData(weight_feature()), + getTensorShape(weight_time()), getTensorData(weight_time()), getTensorShape(bias()), + getTensorData(bias()), scratchpad_data, getTensorShape(output()), + getTensorData(output()), getTensorData(scratchpad), + getTensorData(output_temp), effective_scale_1_a, effective_scale_1_b, + effective_scale_2_a, effective_scale_2_b, input_zp, output_zp); +} + +void SVDF::evalFloat() const +{ + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad_1 = getOutputTensors()[2]; + + luci_interpreter_pal::FloatSVDF( + params_svdf, getTensorShape(input()), getTensorData(input()), + getTensorShape(weight_feature()), getTensorData(weight_feature()), + getTensorShape(weight_time()), getTensorData(weight_time()), getTensorShape(bias()), + getTensorData(bias()), getTensorData(scratchpad_1), scratchpad_data, + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h new file mode 100644 index 0000000..335a6cd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H +#define LUCI_INTERPRETER_KERNELS_SVDF_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SVDF : public KernelWithParams +{ +public: + SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weight_feature() const { return _inputs[1]; } + const Tensor *weight_time() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + const Tensor *input_activation_state() const { return _inputs[4]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalInteger() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp new file mode 100644 index 0000000..82bd9b0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SVDFTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(SVDFTest, FullIntegerTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape bias_shape{units}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector input_data{0.49837467, 0.19278903, 0.26584083, + 0.17660543, 0.52949083, -0.77931279}; + + std::vector weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + std::vector bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999}; + + std::pair input_quant_param = quantizationParams(-1, 1); + std::pair weight_feature_quant_param = quantizationParams(-0.5, 0.5); + std::pair weight_time_quant_param = quantizationParams(-1, 1); + std::pair bias_quant_param = quantizationParams(-512, 512); + std::pair activation_state_quant_param = quantizationParams(-16, 16); + + std::pair output_quant_param = quantizationParams(-0.5, 0.5); + + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor( + weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second, + weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = makeInputTensor( + weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second, + weight_time_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor( + bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor( + DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::RELU; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0}; + + std::vector ref_output_shape{batches, units}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, FloatTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector input_data{0.12609188, -0.46347019, -0.89598465, + 0.35867718, 0.36897406, 0.73463392}; + + std::vector weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883, + -0.03004015, 0.09565311, 0.1587342, 0.00784263}; + + std::vector ref_output_shape{batches, units}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, Unsupported_Type_Configure_NEG) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector input_data{0, 1, 3, 4, 4, -2}; + + std::vector weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SVDFTest, Invalid_Input_Shape_NEG) +{ + const int32_t batches = 2; + const int32_t right_input_size = 3; + const int32_t wrong_input_size = 4; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, wrong_input_size}; + Shape weight_feature_shape{num_filters, right_input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector input_data{0, 1, 3, 2, 4, 4, -2, 1}; + + std::vector weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp new file mode 100644 index 0000000..0429fe1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void ShapeKernel::configure() +{ + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or + output()->element_type() == DataType::S64); + const auto input_shape = input()->shape(); + + Shape output_shape(1); + output_shape.dim(0) = input_shape.num_dims(); + + output()->resize(output_shape); +} + +void ShapeKernel::execute() const +{ + switch (params().out_type) + { + case DataType::S32: + evalInt(); + break; + case DataType::S64: + evalInt(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template void ShapeKernel::evalInt() const +{ + const auto input_shape = input()->shape(); + + auto output_data = getTensorData(output()); + + for (int i = 0; i < input_shape.num_dims(); ++i) + { + output_data[i] = input_shape.dim(i); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h new file mode 100644 index 0000000..cfaadec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H +#define LUCI_INTERPRETER_KERNELS_SHAPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ShapeKernel : public KernelWithParams +{ +public: + ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template void evalInt() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp new file mode 100644 index 0000000..4763e01 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ShapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +template void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager) +{ + Shape input_shape{1, 3, 1, 3, 5}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(dataType); + + ShapeParams params{}; + params.out_type = dataType; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{1, 3, 1, 3, 5}; + EXPECT_THAT(extractTensorData(output_tensor), ref_output_data); + + std::vector ref_output_shape{5}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ShapeTest, OutTypeInt) +{ + + // Run for int32_t output + runShapeKernel(loco::DataType::S32, _memory_manager.get()); + // Run for int64_t output + runShapeKernel(loco::DataType::S64, _memory_manager.get()); + + SUCCEED(); +} + +TEST_F(ShapeTest, Invalid_Output_Type_NEG) +{ + Shape input_shape{1, 3}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + ShapeParams params{}; + params.out_type = loco::DataType::FLOAT32; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp new file mode 100644 index 0000000..2fe2c54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Slice.h" +#include "Utils.h" +#include "PALSlice.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ +const int max_dim = 4; + +Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output) + : Kernel({input, begin, size}, {output}) +{ +} + +template +Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size) +{ + Shape output_shape = Shape(input->shape().num_dims()); + for (int idx = 0; idx < input->shape().num_dims(); idx++) + { + T size_value = getTensorData(size)[idx]; + if (size_value < 0) + { + if (size_value != -1) + { + throw std::runtime_error("Invalid size."); + } + size_value = input->shape().dim(idx) - getTensorData(begin)[idx]; + } + else + { + if (input->shape().dim(idx) < getTensorData(begin)[idx] + size_value) + { + throw std::runtime_error("Invalid begin and size."); + } + } + output_shape.dim(idx) = static_cast(size_value); + } + return output_shape; +} + +template +void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size, + std::vector *begins, std::vector *sizes) +{ + for (int idx = dimensions - 1; idx >= 0; --idx) + { + begins->push_back(getTensorData(begin)[idx]); + sizes->push_back(getTensorData(size)[idx]); + } +} + +void Slice::configure() +{ + assert(input()->element_type() == output()->element_type()); + assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64); + assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64); + assert(begin()->shape().num_dims() == 1); + assert(size()->shape().num_dims() == 1); + assert(input()->shape().num_dims() <= max_dim); + + if (begin()->element_type() == DataType::S32) + { + output()->resize(calculateOutputShape(input(), begin(), size())); + } + else if (begin()->element_type() == DataType::S64) + { + output()->resize(calculateOutputShape(input(), begin(), size())); + } + else + { + throw std::runtime_error("Unsupported type."); + } +} + +void Slice::execute() const +{ + std::vector begins; + begins.reserve(max_dim); + std::vector sizes; + sizes.reserve(max_dim); + if (begin()->element_type() == DataType::S32) + { + getBeginAndSizeVectors(input()->shape().num_dims(), begin(), size(), &begins, &sizes); + } + else if (begin()->element_type() == DataType::S64) + { + getBeginAndSizeVectors(input()->shape().num_dims(), begin(), size(), &begins, &sizes); + } + else + { + throw std::runtime_error("Unsupported begin type."); + } + for (int i = input()->shape().num_dims(); i < max_dim; ++i) + { + begins.push_back(0); + sizes.push_back(1); + } + + assert(begins.size() == 4); + assert(sizes.size() == 4); + tflite::SliceParams op_params{}; + op_params.begin_count = 4; + op_params.size_count = 4; + for (int i = 0; i < 4; i++) + { + op_params.begin[i] = begins[3 - i]; + op_params.size[i] = sizes[3 - i]; + } + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::S8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported input type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h new file mode 100644 index 0000000..23c3596 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H +#define LUCI_INTERPRETER_KERNELS_SLICE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Slice : public Kernel +{ +public: + Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *begin() const { return _inputs[1]; } + const Tensor *size() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SLICE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp new file mode 100644 index 0000000..5179829 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Slice.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template class SliceTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SliceTest, DataTypes); + +TYPED_TEST(SliceTest, SimpleTest) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::vector input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; + Shape input_shape{3, 2, 3, 1}; + std::vector begin_data{1, 0, 0, 0}; + Shape begin_shape{4}; + std::vector size_data{2, 1, -1, 1}; + Shape size_shape{4}; + std::vector output_data{3, 3, 3, 5, 5, 5}; + std::vector output_shape{2, 1, 3, 1}; + + Tensor input_tensor = + makeInputTensor()>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor(begin_shape, begin_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor(size_shape, size_data, memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(getElementType()); + + Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp new file mode 100644 index 0000000..c230aaa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Softmax.h" + +#include "kernels/Utils.h" + +#include +#include "PALSoftmax.h" + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void Softmax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1); + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 || + output()->zero_point() == std::numeric_limits::min()); + tflite::SoftmaxParams op_params{}; + op_params.table = _table; + luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta); + } + output()->resize(input()->shape()); +} + +void Softmax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S8: + evalQuantized(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Softmax::evalFloat() const +{ + tflite::SoftmaxParams op_params{}; + op_params.beta = params().beta; + + tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +template void Softmax::evalQuantized() const +{ + tflite::SoftmaxParams op_params{}; + op_params.table = const_cast(_table); + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta); + luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h new file mode 100644 index 0000000..1f281df --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H +#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Softmax : public KernelWithParams +{ +public: + Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalQuantized() const; + + float _table[256]; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp new file mode 100644 index 0000000..08e7067 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Softmax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template constexpr loco::DataType toLocoDataType(); + +template <> constexpr loco::DataType toLocoDataType() { return loco::DataType::FLOAT32; } + +template <> constexpr loco::DataType toLocoDataType() { return loco::DataType::U8; } + +template <> constexpr loco::DataType toLocoDataType() { return loco::DataType::S8; } + +template ::value, bool> = true> +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(toLocoDataType()); + + SoftmaxParams params{}; + params.beta = 0.1; + + Softmax kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template ::value, bool> = true> +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::pair input_quant_param = + quantizationParams(std::min(std::min(input_data), 0.f), + std::max(std::max(input_data), 0.f)); + std::pair output_quant_param = + quantizationParams(std::min(std::min(output_data), 0.f), + std::max(std::max(output_data), 0.f)); + Tensor input_tensor = makeInputTensor()>(input_shape, input_quant_param.first, + input_quant_param.second, input_data, + memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(toLocoDataType(), output_quant_param.first, output_quant_param.second); + + SoftmaxParams params{}; + params.beta = 0.1; + + Softmax kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template class SoftmaxTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SoftmaxTest, DataTypes); + +TYPED_TEST(SoftmaxTest, Simple) +{ + Check({2, 1, 2, 3}, {2, 1, 2, 3}, + { + 5, -9, 8, // + -7, 2, -4, // + 1, -2, 9, // + 3, -6, -1, // + }, + { + 0.38514, 0.09497, 0.51989, // + 0.20792, 0.51141, 0.28067, // + 0.25212, 0.18678, 0.56110, // + 0.48149, 0.19576, 0.32275, // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp new file mode 100644 index 0000000..630cd38 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/Utils.h" + +#include "PALSpaceToBatchND.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; + +} // namespace + +SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape, + const Tensor *paddings, Tensor *output) + : Kernel({input, block_shape, paddings}, {output}) +{ +} + +void SpaceToBatchND::configure() +{ + const auto *block_shape_data = block_shape()->data(); + const auto *paddings_data = paddings()->data(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2); + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + int final_dim_size = + (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]); + LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0); + output_shape.dim(i + 1) = final_dim_size / block_shape_data[i]; + output_batch_size = output_batch_size * block_shape_data[i]; + } + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void SpaceToBatchND::execute() const +{ + switch (input()->element_type()) + { + tflite::SpaceToBatchParams op_params; + case DataType::FLOAT32: + op_params.output_offset = 0; + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(block_shape()), getTensorData(block_shape()), + getTensorShape(paddings()), getTensorData(paddings()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + op_params.output_offset = output()->zero_point(); + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData(input()), + getTensorShape(block_shape()), getTensorData(block_shape()), + getTensorShape(paddings()), getTensorData(paddings()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h new file mode 100644 index 0000000..0893003 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H +#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SpaceToBatchND : public Kernel +{ +public: + SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *paddings() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp new file mode 100644 index 0000000..3a8b0a8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, + std::initializer_list block_shape_shape, + std::initializer_list paddings_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list block_shape_data, + std::initializer_list paddings_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <> +void Check( + std::initializer_list input_shape, std::initializer_list block_shape_shape, + std::initializer_list paddings_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list block_shape_data, + std::initializer_list paddings_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::pair input_quant_param = + quantizationParams(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template class SpaceToBatchNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes); + +TYPED_TEST(SpaceToBatchNDTest, Simple) +{ + Check(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2}, + /*paddings_shape=*/{2, 2}, + /*output_shape=*/{6, 2, 2, 1}, + /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0}, + /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0}, + /*output_data=*/{0, 0, 0, -0.5, 0, 0, 0, 0.6, 0, -1.0, 0, -0.7, + 0, 0.2, 0, 0.8, 0, -0.3, 0, -0.9, 0, 0.4, 0, 1.0}); +} + +TEST(SpaceToBatchNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor({2}, {2, 2}, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp new file mode 100644 index 0000000..7c29e8c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SpaceToDepth.h" +#include "Utils.h" +#include "PALSpaceToDepth.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void SpaceToDepth::configure() +{ + assert(input()->shape().num_dims() == 4); + assert(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 || + output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64); + assert(input()->element_type() == output()->element_type()); + + const int block_size = params().block_size; + const int32_t input_height = input()->shape().dim(1); + const int32_t input_width = input()->shape().dim(2); + int32_t output_height = input_height / block_size; + int32_t output_width = input_width / block_size; + + assert(input_height == output_height * block_size); + assert(input_width == output_width * block_size); + + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = output_height; + output_shape.dim(2) = output_width; + output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size; + + output()->resize(output_shape); +} + +void SpaceToDepth::execute() const +{ + tflite::SpaceToDepthParams op_params{}; + op_params.block_size = params().block_size; + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h new file mode 100644 index 0000000..e66316b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H +#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +class SpaceToDepth : public KernelWithParams +{ +public: + SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp new file mode 100644 index 0000000..4af4886 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToDepth.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template class SpaceToDepthTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes); + +TYPED_TEST(SpaceToDepthTest, SimpleCase) +{ + std::unique_ptr memory_manager = std::make_unique(); + + constexpr DataType element_type = getElementType(); + std::vector input_data{1, 5, 6, 7, 2, 3, 4, 8}; + Shape input_shape{1, 2, 2, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + std::vector output_data{1, 5, 6, 7, 2, 3, 4, 8}; + std::vector output_shape{1, 1, 1, 8}; + Tensor output_tensor = makeOutputTensor(element_type); + + SpaceToDepthParams params{}; + params.block_size = 2; + + SpaceToDepth kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp new file mode 100644 index 0000000..1a563f3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Split.h" + +#include "Utils.h" + +#include "PALSplit.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Split::Split(const Tensor *axis, const Tensor *input, std::vector outputs) + : Kernel({axis, input}, std::move(outputs)) +{ +} + +void Split::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + const int32_t input_size = input()->shape().dim(_axis_value); + assert(input_size % _outputs.size() == 0); + const int32_t slice_size = input_size / _outputs.size(); + + Shape output_shape = input()->shape(); + output_shape.dim(_axis_value) = slice_size; + for (Tensor *output : _outputs) + { + output->resize(output_shape); + } +} + +void Split::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors all_outputs(_outputs); \ + luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.h b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h new file mode 100644 index 0000000..9542b1e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Split : public Kernel +{ +public: + Split(const Tensor *axis, const Tensor *input, std::vector outputs); + + const Tensor *axis() const { return _inputs[0]; } + const Tensor *input() const { return _inputs[1]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp new file mode 100644 index 0000000..283cd9a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Split.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(int axis, int num_splits, std::initializer_list input_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::vector> output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + constexpr DataType element_type = getElementType(); + Tensor axis_tensor = makeInputTensor({}, {axis}, memory_manager.get()); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + + std::vector output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + EXPECT_THAT(extractTensorData(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template class SplitTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SplitTest, DataTypes); + +TYPED_TEST(SplitTest, FourDimensional) +{ + Check(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, // + }); + Check( + /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 9, 10, 11, 12}, // + {5, 6, 7, 8, 13, 14, 15, 16}, // + }); + Check( + /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 5, 6, 9, 10, 13, 14}, // + {3, 4, 7, 8, 11, 12, 15, 16}, // + }); + Check( + /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 3, 5, 7, 9, 11, 13, 15}, // + {2, 4, 6, 8, 10, 12, 14, 16}, // + }); +} + +TYPED_TEST(SplitTest, OneDimensional) +{ + Check( + /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); +} + +TYPED_TEST(SplitTest, NegativeAxis) +{ + Check( + /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp new file mode 100644 index 0000000..aa68208 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SplitV.h" + +#include "Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector outputs) + : Kernel({input, size_splits, axis}, std::move(outputs)) +{ +} + +void SplitV::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + auto num_split = static_cast(_outputs.size()); + auto sizes_data = getTensorData(size_splits()); + + assert(size_splits()->shape().num_dims() == 1); + + int32_t sum = 0; + const auto num_dims_size_spits = size_splits()->shape().dim(0); + int32_t count_neg_dim = 0; + + for (int32_t i = 0; i < num_dims_size_spits - 1; ++i) + { + if (sizes_data[i] != -1) + { + sum += sizes_data[i]; + } + else + { + count_neg_dim++; + } + } + assert(count_neg_dim < 2); + assert(size_splits()->shape().num_elements() == num_split); + + auto output_shape = input()->shape(); + for (int32_t i = 0; i < num_split; ++i) + { + if (sizes_data[i] == -1) + { + output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum; + } + else + { + output_shape.dim(_axis_value) = sizes_data[i]; + } + _outputs[i]->resize(output_shape); + } +} + +void SplitV::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors all_outputs(_outputs); \ + tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + case DataType::S16: + TF_LITE_SPLIT(int16_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h new file mode 100644 index 0000000..92f6288 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SplitV : public Kernel +{ +public: + SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector outputs); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size_splits() const { return _inputs[1]; } + const Tensor *axis() const { return _inputs[2]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp new file mode 100644 index 0000000..035bc21 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SplitV.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(int axis, std::initializer_list splits_size, + std::initializer_list input_shape, std::initializer_list input_data, + std::vector> output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + + auto num_splits = static_cast(splits_size.size()); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor sizes_tensor = + makeInputTensor({num_splits}, splits_size, memory_manager.get()); + Tensor axis_tensor = makeInputTensor({}, {axis}, memory_manager.get()); + + std::vector output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + auto tmp = extractTensorData(output_tensors[i]); + EXPECT_THAT(extractTensorData(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template class SplitVTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SplitVTest, DataTypes); + +TYPED_TEST(SplitVTest, ThreeDimensional) +{ + Check( + /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 4, 5, 6, 7, 8, 9}, // + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} // + }); + Check( + /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 10, 11, 12, 19, 20, 21}, // + {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} // + }); + Check( + /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 4, 7, 10, 13, 16, 19, 22, 25}, // + {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp new file mode 100644 index 0000000..46e9fc9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Sqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Sqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sqrt::evalFloat() const +{ + auto in = getTensorData(input()); + auto out = getTensorData(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h new file mode 100644 index 0000000..4034655 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H +#define LUCI_INTERPRETER_KERNELS_SQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sqrt : public Kernel +{ +public: + Sqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQRT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp new file mode 100644 index 0000000..96835fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(SqrtTest, SimpleSqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, 8, 2, 4, // + 3, 7, 10, 0.3, // + }, + /*output_data=*/ + { + 0.0, 2.8284271, 1.4142136, 2, // + 1.7320508, 2.6457513, 3.1622777, 0.54772256, // + }); +} + +TEST(SqrtTest, Input_Output_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Sqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(SqrtTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Tensor input_tensor = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp new file mode 100644 index 0000000..bc71905 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/Utils.h" + +#include +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Square::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Square::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Square::evalFloat() const +{ + auto in = getTensorData(input()); + auto out = getTensorData(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = (*i) * (*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.h b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h new file mode 100644 index 0000000..73ed5a7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H +#define LUCI_INTERPRETER_KERNELS_SQUARE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Square : public Kernel +{ +public: + Square(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp new file mode 100644 index 0000000..51662de --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquareTest, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape{3, 1, 2}; + std::vector input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data1, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Square kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp new file mode 100644 index 0000000..3bafeba --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void SquaredDifference::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void SquaredDifference::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalSquaredDifference(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template inline void SquaredDifference::evalSquaredDifference() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output()), [](T x, T y) { + const T difference = x - y; + return difference * difference; + }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h new file mode 100644 index 0000000..9327caf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H +#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SquaredDifference : public Kernel +{ +public: + SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template inline void evalSquaredDifference() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp new file mode 100644 index 0000000..2819c01 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquaredDifferenceTest, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape{3, 1, 2}; + std::vector input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor(input_shape, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(SquaredDifferenceTest, FloatBroadcast) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape1{3, 1, 2}; + Shape input_shape2{1}; + std::vector input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector input_data2{1.0}; + Tensor input_tensor1 = + makeInputTensor(input_shape1, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor(input_shape2, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp new file mode 100644 index 0000000..4a75518 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Squeeze.h" + +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms) + : KernelWithParams({input}, {output}, params) +{ +} + +void Squeeze::configure() +{ + int input_num_dims = input()->shape().num_dims(); + int num_squeeze_dims = params().squeeze_dims.size(); + assert(input_num_dims <= 8); + bool should_squeeze[8] = {false}; + int num_squeezed_dims = 0; + if (num_squeeze_dims == 0) + { + for (int idx = 0; idx < input_num_dims; ++idx) + { + if (input()->shape().dim(idx) == 1) + { + should_squeeze[idx] = true; + ++num_squeezed_dims; + } + } + } + else + { + for (int idx = 0; idx < num_squeeze_dims; ++idx) + { + int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims + : params().squeeze_dims[idx]; + assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1); + if (!should_squeeze[current]) + ++num_squeezed_dims; + should_squeeze[current] = true; + } + } + Shape output_shape(input_num_dims - num_squeezed_dims); + for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx) + { + if (!should_squeeze[in_idx]) + { + output_shape.dim(out_idx++) = input()->shape().dim(in_idx); + } + } + output()->resize(output_shape); +} + +void Squeeze::execute() const +{ + assert(input()->shape().num_elements() == output()->shape().num_elements()); + + const auto *input_data = input()->data(); + auto *output_data = output()->data(); + std::memcpy(output_data, input_data, + getDataTypeSize(input()->element_type()) * input()->shape().num_elements()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h new file mode 100644 index 0000000..687af51 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H +#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Squeeze : public KernelWithParams +{ +public: + Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp new file mode 100644 index 0000000..1bc0b64 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Squeeze.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list output_shape, + std::initializer_list input_data, std::initializer_list output_data, + std::initializer_list squeeze_dims) +{ + std::unique_ptr memory_manager = std::make_unique(); + + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + SqueezeParams params{}; + params.squeeze_dims = squeeze_dims; + + Squeeze kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template class SqueezeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(SqueezeTest, DataTypes); + +TYPED_TEST(SqueezeTest, TotalTest) +{ + Check( + /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + {-1, 0}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp new file mode 100644 index 0000000..a8730d8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/StridedSlice.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, + const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms) + : KernelWithParams({input, begin, end, strides}, {output}, params) +{ +} + +void StridedSlice::configure() +{ + assert(begin()->shape().num_dims() == 1); + assert(end()->shape().num_dims() == 1); + assert(strides()->shape().num_dims() == 1); + assert(input()->element_type() == output()->element_type()); + assert(begin()->element_type() == DataType::S32); + assert(end()->element_type() == DataType::S32); + assert(strides()->element_type() == DataType::S32); + assert(input()->shape().num_dims() <= 4); + if (params().ellipsis_mask != 0) + { + throw std::runtime_error("ellipsis_mask is not implemented yet."); + } + if (params().new_axis_mask != 0) + { + throw std::runtime_error("new_axis_mask is not implemented yet."); + } + if (input()->element_type() == DataType::U8) + { + assert(input()->scale() == output()->scale()); + assert(input()->zero_point() == output()->zero_point()); + } + tflite::StridedSliceParams op_params{}; + op_params.start_indices_count = input()->shape().num_dims(); + op_params.stop_indices_count = input()->shape().num_dims(); + op_params.strides_count = input()->shape().num_dims(); + + for (int i = 0; i < input()->shape().num_dims(); i++) + { + op_params.start_indices[i] = getTensorData(begin())[i]; + op_params.stop_indices[i] = getTensorData(end())[i]; + op_params.strides[i] = getTensorData(strides())[i]; + } + op_params.begin_mask = params().begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = params().end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = params().shrink_axis_mask; + std::vector output_shape_vector; + for (int i = 0; i < input()->shape().num_dims(); i++) + { + int idx = input()->shape().num_dims() - i - 1; + int32_t stride = getTensorData(strides())[idx]; + assert(stride != 0); + int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx); + int32_t end = + ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin); + + const bool shrink_axis = params().shrink_axis_mask & (1 << idx); + if (shrink_axis) + { + end = begin + 1; + } + + int32_t dim_shape = std::ceil((end - begin) / static_cast(stride)); + dim_shape = dim_shape < 0 ? 0 : dim_shape; + if (!shrink_axis) + { + output_shape_vector.push_back(dim_shape); + } + } + Shape output_shape = Shape(output_shape_vector.size()); + for (size_t i = 0; i < output_shape_vector.size(); i++) + { + output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1]; + } + output()->resize(output_shape); +} + +void StridedSlice::execute() const +{ + tflite::StridedSliceParams op_params{}; + op_params.start_indices_count = input()->shape().num_dims(); + op_params.stop_indices_count = input()->shape().num_dims(); + op_params.strides_count = input()->shape().num_dims(); + + for (int i = 0; i < input()->shape().num_dims(); i++) + { + op_params.start_indices[i] = getTensorData(begin())[i]; + op_params.stop_indices[i] = getTensorData(end())[i]; + op_params.strides[i] = getTensorData(strides())[i]; + } + op_params.begin_mask = params().begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = params().end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = params().shrink_axis_mask; + + switch (input()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::S32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h new file mode 100644 index 0000000..fc96893 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H +#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class StridedSlice : public KernelWithParams +{ +public: + StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides, + Tensor *output, const StridedSliceParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *begin() const { return _inputs[1]; } + const Tensor *end() const { return _inputs[2]; } + const Tensor *strides() const { return _inputs[3]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp new file mode 100644 index 0000000..399cdeb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/StridedSlice.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(StridedSliceTest, Float) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape{2, 3, 2}; + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape begin_shape{3}; + std::vector begin_data{0, 0, 0}; + Shape end_shape{3}; + std::vector end_data{1, 3, 2}; + Shape strides_shape{3}; + std::vector strides_data{1, 1, 1}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor(strides_shape, strides_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + StridedSliceParams params{}; + params.begin_mask = 0; + params.end_mask = 0; + params.ellipsis_mask = 0; + params.new_axis_mask = 0; + params.shrink_axis_mask = 1; + + StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector output_shape{3, 2}; + std::vector output_data{1, 2, 3, 4, 5, 6}; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(StridedSliceTest, Uint8) +{ + std::unique_ptr memory_manager = std::make_unique(); + + Shape input_shape{2, 3, 2}; + std::vector input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape begin_shape{3}; + std::vector begin_data{0, 0, 0}; + Shape end_shape{3}; + std::vector end_data{1, 3, 2}; + Shape strides_shape{3}; + std::vector strides_data{1, 1, 1}; + Tensor input_tensor = + makeInputTensor(input_shape, 1.0f, 0, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor(strides_shape, strides_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0); + + StridedSliceParams params{}; + params.begin_mask = 0; + params.end_mask = 0; + params.ellipsis_mask = 0; + params.new_axis_mask = 0; + params.shrink_axis_mask = 1; + + StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector output_shape{3, 2}; + std::vector output_data{1, 2, 3, 4, 5, 6}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp new file mode 100644 index 0000000..24b6a72 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/Utils.h" + +#include "PALSub.h" + +#include + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms) + : KernelWithParams({input1, input2}, {output}, params) +{ +} + +void Sub::configure() +{ + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type())) + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type())) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Sub::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger(); + break; + case DataType::S32: + evalInteger(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sub::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +template void Sub::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +void Sub::evalQuantized() const +{ + const auto input1_scale = static_cast(input1()->scale()); + const auto input2_scale = static_cast(input2()->scale()); + const auto output_scale = static_cast(output()->scale()); + + const int left_shift = 20; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + params.left_shift = left_shift; + // The kernel expects inputs' zero points to be negated. + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input1_multiplier = input1_multiplier; + params.input1_shift = input1_shift; + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.input2_multiplier = input2_multiplier; + params.input2_shift = input2_shift; + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), + getTensorData(input2()), getTensorShape(output()), getTensorData(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData(input1()), + getTensorShape(input2()), getTensorData(input2()), + getTensorShape(output()), getTensorData(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h new file mode 100644 index 0000000..23952b3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SUB_H +#define LUCI_INTERPRETER_KERNELS_SUB_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sub : public KernelWithParams +{ +public: + Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp new file mode 100644 index 0000000..9abafd4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; +using std::pair; +using std::vector; +using std::transform; +using std::initializer_list; + +class SubTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +// for quantized Add, the error shouldn't exceed step +float GetTolerance(float min, float max) +{ + float kQuantizedStep = (max - min) / 255.0; + return kQuantizedStep; +} + +TEST_F(SubTest, Uint8) +{ + Shape base_shape = {2, 3, 1, 2}; + vector base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + vector> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector> output_data = { + {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f, + 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f}, + {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f}, + {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f}, + {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}}; + + float kQuantizedTolerance = GetTolerance(-3.f, 3.f); + pair quant_param = quantizationParams(-3.f, 3.f); + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } + + // Inversion step for output_data, because subtract is not commutative operation + auto multiply = [](auto &i) { + transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; }); + }; + for_each(output_data.begin(), output_data.end(), multiply); + + // Re-run with exchanged inputs. + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +TEST_F(SubTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector> test_outputs = { + {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f, + 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}}; + + vector input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +template void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector> test_outputs = { + {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0, + 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0}, + {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0}, + {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0, + 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0}, + {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}}; + std::vector input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(SubTest, SInt32) +{ + CheckInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, SInt64) +{ + CheckInteger(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(SubTest, Mismatching_Input_Int_Types_NEG) +{ + Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp new file mode 100644 index 0000000..c4fa169 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" + +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Tanh::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + populateLookupTable(); + } + output()->resize(input()->shape()); +} + +void Tanh::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Tanh::evalFloat() const +{ + tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData(input()), + getTensorShape(output()), getTensorData(output())); +} + +void Tanh::evalQuantized() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + uint8_t *output_data = getTensorData(output()); + const uint8_t *input_data = getTensorData(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = getTableValue(input_data[i]); + } +} + +void Tanh::populateLookupTable() +{ + const auto input_scale = static_cast(input()->scale()); + const auto input_zero_point = static_cast(input()->zero_point()); + const auto output_scale = static_cast(output()->scale()); + const auto output_zero_point = static_cast(output()->zero_point()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits::max(); + int32_t minval = std::numeric_limits::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = std::tanh(dequantized); + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast(rescaled + output_zero_point); + setTableValue(static_cast(std::max(std::min(maxval, quantized), minval)), + static_cast(val)); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h new file mode 100644 index 0000000..8017c96 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TANH_H +#define LUCI_INTERPRETER_KERNELS_TANH_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Tanh : public Kernel +{ +public: + Tanh(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void populateLookupTable(); + void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; }; + uint8_t getTableValue(uint8_t idx) const { return _table[idx]; }; + +private: + uint8_t _table[256]{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TANH_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp new file mode 100644 index 0000000..bfae479 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class TanhTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; +}; + +TEST_F(TanhTest, Float) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 0, -0.9999877, 0.9640275, 0.999329, // + 0.99505475, -0.9640275, 1, 0.7615941, // + }; + EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(TanhTest, Uint8) +{ + float kMin = -1; + float kMax = 127.f / 128.f; + float kTanhTolerance = 2 * (1. / 256); + std::pair input_quant_param = quantizationParams(8 * kMin, 8 * kMax); + std::pair output_quant_param = quantizationParams(kMin, kMax); + std::vector input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector ref_output_data{ + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + }; + std::vector ref_output_shape{2, 6, 4, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(TanhTest, InputTypeInvalid_NEG) +{ + std::vector input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(TanhTest, InputOutputMismatch_NEG) +{ + std::vector input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Tanh kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp new file mode 100644 index 0000000..4d983ad --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TestUtils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ +namespace testing +{ + +using ::testing::FloatNear; +using ::testing::Matcher; + +Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); } + +Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point) +{ + return Tensor(element_type, {}, {{scale}, {zero_point}}, ""); +} + +std::vector dequantizeTensorData(const Tensor &tensor) +{ + if (tensor.element_type() == DataType::U8) + { + std::vector data = extractTensorData(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + if (tensor.element_type() == DataType::S8) + { + std::vector data = extractTensorData(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + else if (tensor.element_type() == DataType::S16) + { + // S16 quantization is symmetric, so zero point should be zero. + for (auto zp : tensor.zero_points()) + { + (void)zp; + assert(zp == 0); + } + + std::vector data = extractTensorData(tensor); + if (tensor.scales().size() == 1) + { + return dequantize(data.data(), data.size(), tensor.scale(), 0); + } + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + const Shape shape = tensor.shape(); + const int32_t quantized_dimension = tensor.quantized_dimension(); + assert(quantized_dimension < shape.num_dims()); + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == tensor.scales().size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector dequantized_data; + dequantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + float scale = tensor.scales()[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector part_dequantized_data = + dequantize(data.data() + offset, inner_dims_size, scale, 0); + dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(), + part_dequantized_data.end()); + } + return dequantized_data; + } + else + { + throw std::runtime_error("Unsupported type."); + } +} + +Matcher> FloatArrayNear(const std::vector &values, float max_abs_error) +{ + std::vector> matchers; + matchers.reserve(values.size()); + for (const float v : values) + { + matchers.emplace_back(FloatNear(v, max_abs_error)); + } + return ElementsAreArray(matchers); +} + +std::vector extractTensorShape(const Tensor &tensor) +{ + std::vector result; + int dims = tensor.shape().num_dims(); + for (int i = 0; i < dims; i++) + { + result.push_back(tensor.shape().dim(i)); + } + return result; +} + +} // namespace testing +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h new file mode 100644 index 0000000..1f5a0c3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H +#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H + +#include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" + +#include + +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ +namespace testing +{ + +template +std::vector quantize(const float *data, size_t num_elements, float scale, int32_t zero_point); + +template +Tensor makeInputTensor(const Shape &shape, const std::vector::Type> &data, + IMemoryManager *memory_manager) +{ + Tensor tensor(DT, shape, {}, ""); + memory_manager->allocate_memory(tensor); + tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl
::Type)); + return tensor; +} + +/** + * @brief Create layer-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scale scale of quantized number + * @param zero_point zero point of quantized number, should be 0 for signed datatypes + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template +Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point, + const std::vector &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl
::Type; + Tensor tensor(DT, shape, {{scale}, {zero_point}}, ""); + std::vector quantized_data = + quantize(data.data(), data.size(), scale, zero_point); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + +/** + * @brief Create channel-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scales scales of quantized number + * @param zero_points zero points of quantized number, should be 0 for signed datatypes + * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template +Tensor makeInputTensor(const Shape &shape, const std::vector &scales, + const std::vector &zero_points, int quantized_dimension, + const std::vector &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl
::Type; + assert(quantized_dimension < shape.num_dims()); + Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, ""); + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == scales.size()); + assert(quant_dim_size == zero_points.size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector quantized_data; + quantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + int32_t zero_point = zero_points[channel]; + float scale = scales[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector part_quantized_data = + quantize(data.data() + offset, inner_dims_size, scale, zero_point); + quantized_data.insert(quantized_data.end(), part_quantized_data.begin(), + part_quantized_data.end()); + } + assert(quantized_data.size() == shape.num_elements()); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + +Tensor makeOutputTensor(DataType element_type); +Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point); + +std::vector extractTensorShape(const Tensor &tensor); + +// Returns the corresponding DataType given the type T. +template constexpr DataType getElementType() +{ + if (std::is_same::value) + return DataType::FLOAT32; + if (std::is_same::value) + return DataType::FLOAT64; + if (std::is_same::value) + return DataType::U8; + if (std::is_same::value) + return DataType::U16; + if (std::is_same::value) + return DataType::U32; + if (std::is_same::value) + return DataType::U64; + if (std::is_same::value) + return DataType::S8; + if (std::is_same::value) + return DataType::S16; + if (std::is_same::value) + return DataType::S32; + if (std::is_same::value) + return DataType::S64; + if (std::is_same::value) + return DataType::BOOL; + return DataType::Unknown; +} + +template std::vector extractTensorData(const Tensor &tensor) +{ + const auto *data_ptr = tensor.data(); + return std::vector(data_ptr, data_ptr + tensor.shape().num_elements()); +} + +std::vector dequantizeTensorData(const Tensor &tensor); + +// Array version of `::testing::FloatNear` matcher. +::testing::Matcher> FloatArrayNear(const std::vector &values, + float max_abs_error = 1.0e-5f); + +template +std::vector quantize(const float *data, size_t num_elements, float scale, int32_t zero_point) +{ + static_assert(std::is_integral::value, "Integral type expected."); + + float q_min{}, q_max{}; + if (std::is_signed::value) + { + q_min = -std::numeric_limits::max(); + q_max = std::numeric_limits::max(); + } + else + { + q_min = 0; + q_max = std::numeric_limits::max(); + } + + std::vector q; + for (size_t i = 0; i < num_elements; ++i) + { + const auto &f = data[i]; + q.push_back(static_cast( + std::max(q_min, std::min(q_max, std::round(zero_point + (f / scale)))))); + } + return q; +} + +template +std::vector dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point) +{ + static_assert(std::is_integral::value, "Integral type expected."); + std::vector f; + for (size_t i = 0; i < num_elements; ++i) + { + const T &q = data[i]; + f.push_back(scale * (q - zero_point)); + } + return f; +} + +// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned). +template std::pair quantizationParams(float f_min, float f_max) +{ + static_assert(std::is_integral::value, "Integral type expected."); + int32_t zero_point = 0; + float scale = 0; + const T qmin = std::numeric_limits::lowest(); + const T qmax = std::numeric_limits::max(); + const float qmin_double = qmin; + const float qmax_double = qmax; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + assert(f_max >= 0); + assert(f_min <= 0); + if (f_min == f_max) + { + // Special case where the min,max range is a point. Should be {0}. + assert(f_max == 0); + assert(f_min == 0); + return {scale, zero_point}; + } + + // General case. + // + // First determine the scale. + scale = (f_max - f_min) / (qmax_double - qmin_double); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const float zero_point_from_min = qmin_double - f_min / scale; + const float zero_point_from_max = qmax_double - f_max / scale; + + const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale); + + const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale); + + const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + + T nudged_zero_point = 0; + if (zero_point_double < qmin_double) + { + nudged_zero_point = qmin; + } + else if (zero_point_double > qmax_double) + { + nudged_zero_point = qmax; + } + else + { + nudged_zero_point = static_cast(std::round(zero_point_double)); + } + + // The zero point should always be in the range of quantized value, + // // [qmin, qmax]. + assert(qmax >= nudged_zero_point); + assert(qmin <= nudged_zero_point); + zero_point = nudged_zero_point; + // finally, return the values + return {scale, zero_point}; +} + +inline float getTolerance(float min, float max, int quantize_steps) +{ + return ((max - min) / quantize_steps); +} + +} // namespace testing +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp new file mode 100644 index 0000000..802d872 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Transpose.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output) + : Kernel({input, perm}, {output}) +{ +} + +void Transpose::configure() +{ + // Transpose op only supports 1D-4D input arrays. + int dims = input()->shape().num_dims(); + const int32_t *perm_data = getTensorData(perm()); + + assert(input()->shape().num_dims() <= 4); + assert(input()->element_type() == output()->element_type()); + + assert(perm()->shape().num_dims() == 1); + assert(perm()->shape().dim(0) == dims); + + Shape output_shape(dims); + for (int i = 0; i < dims; i++) + { + assert(perm_data[i] < dims && perm_data[i] >= 0); + output_shape.dim(i) = input()->shape().dim(perm_data[i]); + } + + output()->resize(output_shape); +} + +void Transpose::execute() const +{ + tflite::TransposeParams params{}; + const int32_t *perm_data = getTensorData(perm()); + const int32_t size = perm()->shape().dim(0); + params.perm_count = size; + for (int i = 0; i < size; i++) + params.perm[i] = perm_data[i]; + switch (input()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + case DataType::U8: + tflite::reference_ops::Transpose(params, getTensorShape(input()), + getTensorData(input()), getTensorShape(output()), + getTensorData(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h new file mode 100644 index 0000000..d6f89c3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H +#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Transpose : public Kernel +{ +public: + Transpose(const Tensor *input, const Tensor *perm, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *perm() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp new file mode 100644 index 0000000..43be8f8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Transpose.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list input_shape, std::initializer_list perm_shape, + std::initializer_list output_shape, std::initializer_list input_data, + std::initializer_list perm_data, std::initializer_list output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + Tensor perm_tensor = makeInputTensor(perm_shape, perm_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Transpose kernel(&input_tensor, &perm_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template class TransposeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(TransposeTest, DataTypes); + +TYPED_TEST(TransposeTest, Small3D) +{ + Check(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + /*perm_data=*/{2, 0, 1}, + /*output_data=*/{0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21, + 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23}); +} + +TYPED_TEST(TransposeTest, Large4D) +{ + Check( + /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{2, 0, 1, 3}, + /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, + 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, + 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49, + 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109, + 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, + 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, + 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, + 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}); +} + +TYPED_TEST(TransposeTest, Large2D) +{ + Check( + /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{1, 0}, + /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49, + 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110, + 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52, + 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113, + 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55, + 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116, + 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58, + 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp new file mode 100644 index 0000000..1b5f9d9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TransposeConv.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms) + : KernelWithParams({output_shape, filter, input, bias}, + {output, scratch_tensor}, params) +{ +} + +TransposeConv::~TransposeConv() +{ + // Define destructor here, to delete vector of qunatized multipliers properly +} + +void TransposeConv::configure() +{ + assert(output_shape()->shape().num_dims() == 1); + assert(input()->shape().num_dims() == 4); + assert(filter()->shape().num_dims() == 4); + assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 || + input()->element_type() == DataType::S16); + assert(input()->element_type() == output()->element_type()); + assert(input()->shape().dim(3) == filter()->shape().dim(3)); + + const int num_dims = output_shape()->shape().dim(0); + Shape out_shape(num_dims); + const auto *shape_data = getTensorData(output_shape()); + for (int i = 0; i < num_dims; i++) + out_shape.dim(i) = shape_data[i]; + output()->resize(out_shape); + + const int32_t filter_height = filter()->shape().dim(1); + const int32_t filter_width = filter()->shape().dim(2); + const int32_t output_height = out_shape.dim(1); + const int32_t output_width = out_shape.dim(2); + + const int32_t unused_output_height = + computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1); + const int32_t unused_output_width = + computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1); + + _padding_height = + computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height); + _padding_width = + computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width); + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->resize(output()->shape()); + const std::vector real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + _quant_multipliers = quantizeMultipliers(real_multipliers); + } + else + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->set_allocatable(false); + } +} + +void TransposeConv::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void TransposeConv::evalFloat() const +{ + tflite::ConvParams op_params{}; + op_params.padding_type = tflite::PaddingType::kSame; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData(input()), // + getTensorShape(filter()), getTensorData(filter()), // + getTensorShape(bias()), getTensorData(bias()), // + getTensorShape(output()), getTensorData(output()), // + tflite::RuntimeShape(), nullptr); +} + +void TransposeConv::evalQuantized() const +{ + tflite::ConvParams op_params{}; + op_params.padding_type = tflite::PaddingType::kSame; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + // The kernel expects input and filter zero points to be negated. + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.weights_offset = -filter()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_multiplier = _quant_multipliers[0].multiplier; + op_params.output_shift = _quant_multipliers[0].shift; + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + + auto scratch_tensor = getOutputTensors()[1]; + + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData(input()), // + getTensorShape(filter()), getTensorData(filter()), // + getTensorShape(bias()), getTensorData(bias()), // + getTensorShape(output()), getTensorData(output()), // + tflite::RuntimeShape(), nullptr, // + getTensorData(scratch_tensor)); +} + +void TransposeConv::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t)); + + BroadcastableWrapper output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast(input_val - input()->zero_point()) * + static_cast(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void TransposeConv::evalQuantizedS16() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t)); + + BroadcastableWrapper output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast(input_val) * static_cast(filter_val); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h new file mode 100644 index 0000000..cea0cf3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H +#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ChannelQuantMultipliers; + +class TransposeConv : public KernelWithParams +{ +public: + TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms); + + ~TransposeConv(); + + const Tensor *output_shape() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *input() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + std::vector _quant_multipliers; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp new file mode 100644 index 0000000..4856e1b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TransposeConv.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(std::initializer_list output_shape_shape, + std::initializer_list weight_shape, std::initializer_list input_shape, + std::initializer_list bias_shape, std::initializer_list output_shape, + std::initializer_list output_shape_data, std::initializer_list weight_data, + std::initializer_list input_data, std::initializer_list bias_data, + std::initializer_list output_data, luci::Padding padding, int32_t stride_height, + int32_t stride_width) +{ + std::unique_ptr memory_manager = std::make_unique(); + + constexpr DataType element_type = getElementType(); + Tensor output_shape_tensor = + makeInputTensor(output_shape_shape, output_shape_data, memory_manager.get()); + Tensor weight_tensor = + makeInputTensor(weight_shape, weight_data, memory_manager.get()); + Tensor input_data_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + + DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(element_type); + + TransposeConvParams params{}; + params.padding = padding; + params.stride_height = stride_height; + params.stride_width = stride_width; + + if (bias_data.size() != 0) + { + Tensor bias_tensor = + makeInputTensor()>(bias_shape, bias_data, memory_manager.get()); + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + } + else + { + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + } + EXPECT_THAT(extractTensorData(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +TEST(TransposeConvTest, FloatSimple) +{ + Check( + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*bias_data=*/{}, + /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); + + SUCCEED(); +} + +TEST(TransposeConvTest, FloatTwoFiltersTest) +{ + Check( + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + /*bias_data=*/{}, + /*output_data=*/ + {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); + + SUCCEED(); +} + +TEST(TransposeConvTest, SimpleBiasTest) +{ + Check( + /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1}, + /*input_shape=*/{1, 2, 2, 1}, + /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2}, + /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + /*input_data=*/{1, 2, 3, 4}, + /*bias_data=*/{3, 4}, + /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21, + 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34, + 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, + /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2); + + SUCCEED(); +} + +TEST(TransposeConvTest, UInt8) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::vector input_data{1, 2, 3, 4}; + std::vector filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector bias_data{3, 4}; + std::vector output_shape_data{1, 5, 5, 2}; + std::vector ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto filter_quant = quantizationParams(-24.0, 39.75); // s = 1 / 4, zp = 96 + auto output_quant = quantizationParams(-64.0, 191.0); // s = 1, zp = 64 + + Tensor input_tensor = makeInputTensor( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor( + {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor({2}, input_quant.first * filter_quant.first, + 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, UInt8_CWQ) +{ + std::unique_ptr memory_manager = std::make_unique(); + + const int32_t output_channels = 2; + std::vector input_data{1, 2, 3, 4}; + std::vector filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector bias_data{3, 4}; + std::vector output_shape_data{1, 5, 5, 2}; + std::vector ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto output_quant = quantizationParams(-64.0, 191.0); // s = 1, zp = 64 + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(0, 17)); + filter_quant_params.push_back(quantizationParams(0, 18)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor( + {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor({output_channels}, bias_scales, zerop, 0, + bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16) +{ + std::unique_ptr memory_manager = std::make_unique(); + + std::vector input_data{1, 2, 3, 4}; + std::vector filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector bias_data{3, 4}; + std::vector output_shape_data{1, 5, 5, 2}; + std::vector ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + Tensor input_tensor = + makeInputTensor({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get()); + Tensor filter_tensor = + makeInputTensor({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16_CWQ_weights) +{ + std::unique_ptr memory_manager = std::make_unique(); + + const int output_channels = 2; + const Shape input_shape{1, 2, 2, 1}; + const Shape filter_shape{output_channels, 3, 3, 1}; + const Shape bias_shape{output_channels}; + std::vector output_shape_data{1, 5, 5, output_channels}; + + std::vector input_data{1, 2, 3, 4}; + std::vector filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector bias_data{3, 4}; + + std::vector ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + const float input_scale = 0.25; + const float output_scale = 0.5; + const std::vector filter_scales{0.2f, 0.5f}; + std::vector bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale}; + const std::vector zerop(2, 0); + + Tensor input_tensor = + makeInputTensor(input_shape, input_scale, 0, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor(filter_shape, filter_scales, zerop, 0, + filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data, + memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp new file mode 100644 index 0000000..9127241 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Unpack.h" + +#include "kernels/Utils.h" + +#include + +#include + +namespace luci_interpreter +{ + +namespace kernels +{ + +Unpack::Unpack(const Tensor *input, std::vector outputs, const UnpackParams ¶ms) + : KernelWithParams({input}, std::move(outputs), params) +{ +} + +void Unpack::configure() +{ + const Shape &input_shape = input()->shape(); + + int axis = _params.axis; + if (axis < 0) + axis += input()->shape().num_dims(); + assert(axis >= 0 && axis < input_shape.num_dims()); + + Shape output_shape(input_shape.num_dims() - 1); + int out_index = 0; + for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index) + { + if (in_index != axis) + output_shape.dim(out_index++) = input_shape.dim(in_index); + } + + for (Tensor *output : _outputs) + { + assert(output->element_type() == input()->element_type()); + output->resize(output_shape); + } +} + +template void Unpack::executeImpl() const +{ + tflite::UnpackParams params{}; + params.axis = _params.axis; + params.num_split = _outputs.size(); + VectorOfTensors all_outputs(_outputs); + tflite::reference_ops::Unpack(params, getTensorShape(input()), getTensorData(input()), + **all_outputs.shapes(), all_outputs.data()); +} + +void Unpack::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + return executeImpl(); + case DataType::U8: + return executeImpl(); + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h new file mode 100644 index 0000000..f4a44ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H +#define LUCI_INTERPRETER_KERNELS_UNPACK_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Unpack : public KernelWithParams +{ +public: + Unpack(const Tensor *input, std::vector outputs, const UnpackParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + template void executeImpl() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp new file mode 100644 index 0000000..9384ddc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Unpack.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template +void Check(int axis, Shape input_shape, std::initializer_list input_data, + const std::vector> &exp_output_shape, + std::vector> exp_output_data) +{ + std::unique_ptr memory_manager = std::make_unique(); + constexpr DataType element_type = getElementType(); + const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis); + + Tensor input_tensor = + makeInputTensor(input_shape, input_data, memory_manager.get()); + std::vector output_tensors; + output_tensors.reserve(num_outputs); + for (int i = 0; i < num_outputs; ++i) + { + output_tensors.push_back(makeOutputTensor(element_type)); + } + + std::vector output_tensor_ptrs(num_outputs); + for (int i = 0; i < num_outputs; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + UnpackParams params{}; + params.axis = axis; + + Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params); + kernel.configure(); + for (int i = 0; i < num_outputs; i++) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_outputs; ++i) + { + EXPECT_THAT(extractTensorData(output_tensors[i]), + ::testing::ElementsAreArray(exp_output_data[i])); + } +} + +template class UnpackTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(UnpackTest, DataTypes); + +TYPED_TEST(UnpackTest, ThreeOutputs) +{ + Check(/*axis=*/0, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{2}, {2}, {2}}, + /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsAxisOne) +{ + Check(/*axis=*/1, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{3}, {3}}, + /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne) +{ + Check(/*axis=*/-1, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{3}, {3}}, + /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo) +{ + Check(/*axis=*/-2, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{2}, {2}, {2}}, + /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}}); +} + +TYPED_TEST(UnpackTest, OneOutput) +{ + Check(/*axis=*/0, /*input_shape=*/{1, 6}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{6}}, + /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs) +{ + Check(/*axis=*/2, /*input_shape=*/{2, 2, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8}, + /*exp_output_shape=*/{{2, 2}, {2, 2}}, + /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}}); +} + +TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs) +{ + Check( + /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}}, + /*exp_output_data=*/ + {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}}); +} + +TYPED_TEST(UnpackTest, VectorToScalar) +{ + Check(/*axis=*/0, /*input_shape=*/{5}, + /*input_data=*/{1, 2, 3, 4, 5}, + /*exp_output_shape=*/{{}, {}, {}, {}, {}}, + /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp new file mode 100644 index 0000000..5d8e5db --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Utils.h" + +#include +#include +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +template +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max) +{ + switch (activation) + { + case Activation::NONE: + *activation_min = std::numeric_limits::lowest(); + *activation_max = std::numeric_limits::max(); + break; + case Activation::RELU: + *activation_min = 0; + *activation_max = std::numeric_limits::max(); + break; + case Activation::RELU_N1_TO_1: + *activation_min = -1; + *activation_max = 1; + break; + case Activation::RELU6: + *activation_min = 0; + *activation_max = 6; + break; + default: + throw std::runtime_error("Unsupported activation."); + } +} + +template void calculateActivationRange(Activation activation, float *activation_min, + float *activation_max); +template void calculateActivationRange(Activation activation, int32_t *activation_min, + int32_t *activation_max); +template void calculateActivationRange(Activation activation, int64_t *activation_min, + int64_t *activation_max); + +static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax, + const Tensor *output, int32_t *activation_min, + int32_t *activation_max) +{ + const float scale = output->scale(); + const int32_t zero_point = output->zero_point(); + + auto quantize = [scale, zero_point](float x) { + return zero_point + static_cast(std::round(x / scale)); + }; + + switch (activation) + { + case Activation::NONE: + case Activation::TANH: + *activation_min = qmin; + *activation_max = qmax; + break; + case Activation::RELU: + *activation_min = std::max(qmin, quantize(0.0f)); + *activation_max = qmax; + break; + case Activation::RELU_N1_TO_1: + *activation_min = std::max(qmin, quantize(-1.0f)); + *activation_max = std::min(qmax, quantize(1.0f)); + break; + case Activation::RELU6: + *activation_min = std::max(qmin, quantize(0.0f)); + *activation_max = std::min(qmax, quantize(6.0f)); + break; + default: + throw std::runtime_error("Unsupported activation."); + } +} + +void calculateActivationRangeQuantized(Activation activation, const Tensor *output, + int32_t *activation_min, int32_t *activation_max) +{ + assert(output->zero_points().size() == 1); + int32_t qmin{}; + int32_t qmax{}; + switch (output->element_type()) + { + case DataType::U8: + qmin = 0; + qmax = std::numeric_limits::max(); + break; + case DataType::S8: + qmin = -std::numeric_limits::max(); + qmax = std::numeric_limits::max(); + break; + case DataType::S16: + // For now, assume that signed int16 type implies signed symmetric quantization. + assert(output->zero_point() == 0); + qmin = -std::numeric_limits::max(); + qmax = std::numeric_limits::max(); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min, + activation_max); +} + +void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) +{ + if (double_multiplier == 0.0) + { + *quantized_multiplier = 0; + *shift = 0; + return; + } + + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast(std::round(q * (INT64_C(1) << 31))); + + if (q_fixed == (INT64_C(1) << 31)) + { + q_fixed /= 2; + ++*shift; + } + assert(q_fixed <= std::numeric_limits::max()); + // A shift amount smaller than -31 would cause all bits to be shifted out + // and thus all results would be zero. We implement that instead with + // q_fixed==0, so as to avoid hitting issues with right-shift + // operations with shift amounts greater than 31. Note that this happens + // roughly when abs(double_multiplier) < 2^-31 and the present handling means + // that we're effectively flushing tiny double_multiplier's to zero. + // We could conceivably handle values in the range (roughly) [32, 63] + // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view + // the present handling is just doing 'flush denormals to zero'. We could + // reconsider and actually generate nonzero denormals if a need arises. + if (*shift < -31) + { + *shift = 0; + q_fixed = 0; + } + *quantized_multiplier = static_cast(q_fixed); +} + +void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift) +{ + assert(double_multiplier < 1.0); + assert(double_multiplier > 0.0); + int shift; + quantizeMultiplier(double_multiplier, quantized_multiplier, &shift); + assert(shift <= 0); + *left_shift = shift; +} + +Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape) +{ + const int num_input1_dims = input1_shape.num_dims(); + const int num_input2_dims = input2_shape.num_dims(); + const int num_out_dims = std::max(num_input1_dims, num_input2_dims); + Shape output_shape(num_out_dims); + + for (int i = 0; i < num_out_dims; ++i) + { + const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1; + const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1; + + bool need_broadcast = input1_dim != input2_dim; + bool can_broadcast = input1_dim == 1 || input2_dim == 1; + LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast); + + output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim); + } + + return output_shape; +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h new file mode 100644 index 0000000..ebeb20e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H +#define LUCI_INTERPRETER_KERNELS_UTILS_H + +#include "core/KernelParams.h" +#include "luci_interpreter/core/Tensor.h" + +#include + +#include +#include +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +#define LUCI_INTERPRETER_CHECK(cond) \ + if (!(cond)) \ + throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \ + std::string(#cond) + ") was not true."); + +inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size, int32_t *offset) +{ + int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size); + total_padding = total_padding > 0 ? total_padding : 0; + *offset = total_padding % 2; + return total_padding / 2; +} + +inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, + int32_t stride, int32_t dilation_rate = 1) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + switch (padding) + { + case Padding::SAME: + return (image_size + stride - 1) / stride; + case Padding::VALID: + return (image_size + stride - effective_filter_size) / stride; + default: + assert(false); + return 0; + } +} + +inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3) +{ + return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3; +} + +template +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max); + +void calculateActivationRangeQuantized(Activation activation, const Tensor *output, + int32_t *activation_min, int32_t *activation_max); + +template constexpr bool one_of_types() { return false; } + +// Checks if T is equal to one of {U,Other} types +template constexpr bool one_of_types() +{ + return std::is_same::value || one_of_types(); +} + +/** + * Fills activation min and max parameters depending on given data type and activation + * + * T is a template parameter, so after optimization this code left with only required if case + * + * @tparam T data type of arithmetic operation output tensor + * @param params tflite params to fill + * @param activation luci_interpreter::Activation of arithmetic operation + */ +template +void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act) +{ + static_assert(one_of_types(), "Unsupported dtype"); + + if (std::is_same::value) + calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max); + if (std::is_same::value) + calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max); + else + calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max); +} + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Handles an arbitrary positive multiplier. The 'shift' output-value is +// basically the 'floating-point exponent' of the multiplier: +// Negative for a right-shift (when the multiplier is <1), positive for a +// left-shift (when the multiplier is >1) +void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of NEGATIVE its exponent --- +// this is intended as a RIGHT-shift. +// +// Restricted to the case where the multiplier < 1 (and non-negative). +void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift); + +Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape); + +inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, + float output_scale) +{ + const double input_product_scale = static_cast(input_scale * filter_scale); + LUCI_INTERPRETER_CHECK(input_product_scale >= 0); + return input_product_scale / static_cast(output_scale); +} + +// TODO rename getQuantizedConvolutionMultiplers to something more general +// it is used for non conv operators too +inline std::vector getQuantizedConvolutionMultiplers(float input_scale, + const std::vector &filter_scale, + float output_scale) +{ + std::vector effective_output_scales; + size_t n = filter_scale.size(); + effective_output_scales.reserve(n); + for (size_t i = 0; i < n; ++i) + { + effective_output_scales.push_back( + getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale)); + } + return effective_output_scales; +} + +struct ChannelQuantMultipliers +{ + int shift; + int32_t multiplier; + ChannelQuantMultipliers() = default; +}; + +inline std::vector +quantizeMultipliers(const std::vector &effective_scale) +{ + size_t n = effective_scale.size(); + std::vector params(n); + for (size_t i = 0; i < n; ++i) + { + quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift); + } + return params; +} + +// Helper wrapper to hide broadcast logic +template class BroadcastableWrapper +{ +public: + BroadcastableWrapper(const std::vector &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {} + + T operator[](int idx) { return _v[idx * _stride]; } + +private: + const std::vector &_v; + int _stride; +}; + +inline tflite::RuntimeShape getTensorShape(const Tensor *tensor) +{ + if (tensor == nullptr) + return tflite::RuntimeShape(); + + const Shape &shape = tensor->shape(); + tflite::RuntimeShape runtime_shape(shape.num_dims()); + for (int i = 0; i < shape.num_dims(); ++i) + { + runtime_shape.SetDim(i, shape.dim(i)); + } + return runtime_shape; +} + +template const T *getTensorData(const Tensor *tensor) +{ + return tensor != nullptr ? tensor->data() : nullptr; +} + +template T *getTensorData(Tensor *tensor) +{ + return tensor != nullptr ? tensor->data() : nullptr; +} + +// A list of tensors in a format that can be used by kernels like split and +// concatenation. +template class VectorOfTensors +{ +public: + using ElementT = typename std::conditional::type; + using TensorT = typename std::conditional::type; + + // Build with the tensors in 'tensor_list'. + explicit VectorOfTensors(const std::vector &tensor_list) + { + const int num_tensors = tensor_list.size(); + + all_data_.reserve(num_tensors); + all_shape_.reserve(num_tensors); + all_shape_ptr_.reserve(num_tensors); + + for (TensorT *tensor : tensor_list) + { + all_data_.push_back(getTensorData(tensor)); + all_shape_.push_back(getTensorShape(tensor)); + } + + // Taking the pointer from inside a std::vector is only OK if the vector is + // never modified, so we populate all_shape in the previous loop and then we + // are free to grab iterators here. + for (tflite::RuntimeShape &shape : all_shape_) + { + all_shape_ptr_.push_back(&shape); + } + } + // Return a pointer to the data pointers of all tensors in the list. For + // example: + // float* const* f = v.data(); + // f[0][1] is the second element of the first tensor. + ElementT *const *data() const { return all_data_.data(); } + + // Return a pointer the shape pointers of all tensors in the list. For + // example: + // const RuntimeShape* const* d = v.dims(); + // dims[1] are the dimensions of the second tensor in the list. + const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); } + +private: + std::vector all_data_; + std::vector all_shape_; + std::vector all_shape_ptr_; +}; + +// A list of quantized tensors in a format that can be used by kernels like +// split and concatenation. +template class VectorOfQuantizedTensors : public VectorOfTensors +{ +public: + using typename VectorOfTensors::TensorT; + + // Build with the tensors in 'tensor_list'. + explicit VectorOfQuantizedTensors(const std::vector &tensor_list) + : VectorOfTensors(tensor_list) + { + for (TensorT *tensor : tensor_list) + { + zero_point_.push_back(tensor->zero_point()); + scale_.push_back(tensor->scale()); + } + } + + const float *scale() const { return scale_.data(); } + const int32_t *zero_point() const { return zero_point_.data(); } + +private: + std::vector zero_point_; + std::vector scale_; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_UTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp new file mode 100644 index 0000000..153bd1a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/While.h" +#include "kernels/Utils.h" + +#include + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +void copy(const std::vector &src, const std::vector &dst) +{ + for (size_t i = 0; i < src.size(); ++i) + { + LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type()); + dst[i]->resize(src[i]->shape()); + + const int32_t num_elements = src[i]->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(src[i]->element_type()); + std::memcpy(dst[i]->data(), src[i]->data(), num_elements * element_size); + } +} + +void copy(const std::vector &src, const std::vector &dst) +{ + std::vector const_src; + for (const auto &t : src) + const_src.push_back(t); + copy(const_src, dst); +} + +// TODO: Think about how allocate memory for output in main graph +void configureTensorsAllocations(const std::vector &tensors, RuntimeGraph *run_graph) +{ + for (auto tensor : tensors) + run_graph->configureAllocations(tensor); +} + +} // namespace + +While::While(std::vector inputs, std::vector outputs, + RuntimeGraph *cond_graph, RuntimeGraph *body_graph) + : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph) +{ +} + +void While::configure() +{ + LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size()); + + LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size()); + + const auto &cond_outputs = _cond_graph->getOutputTensors(); + LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1) + LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL); +} + +/** + * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data() + */ +void While::execute() const +{ + const auto &cond_inputs = _cond_graph->getInputTensors(); + const auto &cond_outputs = _cond_graph->getOutputTensors(); + + configureTensorsAllocations(cond_inputs, _cond_graph); + + copy(getInputTensors(), cond_inputs); + + const auto &body_inputs = _body_graph->getInputTensors(); + const auto &body_outputs = _body_graph->getOutputTensors(); + + configureTensorsAllocations(body_inputs, _body_graph); + + while (true) + { + _cond_graph->execute(); + + bool cond_value = cond_outputs[0]->data()[0]; + if (!cond_value) + break; + + copy(cond_inputs, body_inputs); + + _body_graph->execute(); + + copy(body_outputs, cond_inputs); + } + + copy(cond_inputs, getOutputTensors()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.h b/compiler/luci-micro/luci-interpreter/src/kernels/While.h new file mode 100644 index 0000000..f758df3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H +#define LUCI_INTERPRETER_KERNELS_WHILE_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class While : public Kernel +{ +public: + While(std::vector inputs, std::vector outputs, RuntimeGraph *cond_graph, + RuntimeGraph *body_graph); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + RuntimeGraph *const _cond_graph = nullptr; + RuntimeGraph *const _body_graph = nullptr; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_WHILE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp new file mode 100644 index 0000000..cb8f891 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeModule.h" +#include "kernels/Add.h" +#include "kernels/Less.h" +#include "kernels/While.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique(DataType::BOOL, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + graph->addKernel(std::make_unique(input, input_cond, output)); + + return graph; +} + +RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique(dtype, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + AddParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique(input, input_add, output, params)); + + return graph; +} + +TEST(WhileTest, FloatLoop10) +{ + std::unique_ptr memory_manager = std::make_unique(); + Tensor input = makeInputTensor({1}, {1}, memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + Tensor input_cond = makeInputTensor({1}, {10}, memory_manager.get()); + Tensor input_add = makeInputTensor({1}, {1}, memory_manager.get()); + + RuntimeModule module(nullptr); + RuntimeGraph *cond_graph = + buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get()); + RuntimeGraph *body_graph = + buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get()); + + While kernel({&input}, {&output}, cond_graph, body_graph); + kernel.configure(); + memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData(output), FloatArrayNear({10})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt new file mode 100644 index 0000000..2927715 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt @@ -0,0 +1,39 @@ +set(SOURCES + GraphLoader.h + GraphLoader.cpp + KernelBuilderHelper.h + KernelBuilderHelper.cpp + KernelBuilder.h + KernelBuilder.cpp + ModuleLoader.h + ModuleLoader.cpp + RuntimeToIR.h + nodes/Builders.h) + +# include kernel specific builders +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "nodes/${NODE}.cpp") +endmacro(REGISTER_KERNEL) +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}") +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") + +target_link_libraries(${LUCI_INTERPRETER_LOADER} + PUBLIC luci_lang ${LUCI_INTERPRETER_CORE} + PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +set(TEST_SOURCES KernelBuilder.test.cpp) + +GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER}) diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp new file mode 100644 index 0000000..4020709 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/GraphLoader.h" + +#include "loader/KernelBuilder.h" + +#include +#include + +namespace luci_interpreter +{ +namespace +{ + +template Shape getNodeShape(const NodeT *node) +{ + Shape shape(node->rank()); + for (uint32_t i = 0; i < node->rank(); ++i) + { + shape.dim(i) = node->dim(i).value(); + } + return shape; +} + +template const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size) +{ + const size_t element_size = getDataTypeSize(DT); + const int32_t num_elements = node->size
(); + + *data_size = num_elements * element_size; + if (*data_size > 0) + { + // FIXME There is no good way to get the pointer to the data currently. + return &node->at
(0); + } + return nullptr; +} + +const void *getNodeData(const luci::CircleConst *node, size_t *data_size) +{ + switch (node->dtype()) + { + case DataType::U8: + return getNodeDataImpl(node, data_size); + case DataType::FLOAT32: + return getNodeDataImpl(node, data_size); + case DataType::S8: + return getNodeDataImpl(node, data_size); + case DataType::S16: + return getNodeDataImpl(node, data_size); + case DataType::S32: + return getNodeDataImpl(node, data_size); + case DataType::S64: + return getNodeDataImpl(node, data_size); + case DataType::BOOL: + return getNodeDataImpl(node, data_size); + default: + throw std::runtime_error("Unsupported type."); + } +} + +const void *getNodeData(const luci::CircleCustom *node, size_t *data_size) +{ + if (node->custom_code() != "CircleReferencingConst") + return nullptr; + + // helper struct which describes data loaded to custom_options of CircleReferencingConst node + // TODO move this struct to header + struct ConstDataReference + { + const uint8_t *data = nullptr; + uint32_t size = 0; + }; + + const auto &custom_options = node->custom_options(); + const auto &const_data_ref = *reinterpret_cast(custom_options.data()); + + *data_size = const_data_ref.size; + return const_data_ref.data; +} + +bool isExecutableNode(const luci::CircleNode *node) +{ + switch (node->opcode()) + { + // These nodes denote inputs / outputs of a graph. + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEOUTPUT: + case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE: + // The following nodes denote outputs of multiple-output nodes. + case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT: + case luci::CircleOpcode::CIRCLESPLITOUT: + case luci::CircleOpcode::CIRCLESPLITVOUT: + case luci::CircleOpcode::CIRCLETOPKV2OUT: + case luci::CircleOpcode::CIRCLEUNIQUEOUT: + case luci::CircleOpcode::CIRCLEUNPACKOUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + case luci::CircleOpcode::CIRCLEWHILEOUT: + return false; + // Custom nodes may be executable and non-executable + case luci::CircleOpcode::CUSTOM: + { + auto const custom_node = loco::must_cast(node); + + // TODO handle more non-executable Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return false; + + return true; + } + default: + return true; + } +} + +bool isTensorProducingNode(const luci::CircleNode *node) +{ + switch (node->opcode()) + { + // Output nodes do not produce tensors. + case luci::CircleOpcode::CIRCLEOUTPUT: + // The following nodes are multiple-output nodes. They do not produce tensors, the tensors + // are produced by the corresponding *Out nodes instead. + case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM: + case luci::CircleOpcode::CUSTOM: + case luci::CircleOpcode::IF: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5: + case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::SPLIT_V: + case luci::CircleOpcode::TOPK_V2: + case luci::CircleOpcode::UNIQUE: + case luci::CircleOpcode::UNPACK: + case luci::CircleOpcode::WHILE: + return false; + default: + return true; + } +} + +bool isSupportedCustomNode(const luci::CircleNode *node) +{ + const auto custom_node = loco::must_cast(node); + + // TODO handle more Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return true; + + return false; +} + +} // namespace + +GraphLoader::GraphLoader( + const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, + const std::unordered_map &graph_to_runtime_graph, + std::unordered_map &node_to_tensor, IMemoryManager *memory_manager) + : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir), + _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor), + _memory_manager(memory_manager) +{ +} + +void GraphLoader::loadTensors() +{ + for (uint32_t i = 0; i < _graph->nodes()->size(); ++i) + { + const auto *node = loco::must_cast(_graph->nodes()->at(i)); + + if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) + throw std::runtime_error("Unsupported Custom operator. " + node->name()); + + if (!isTensorProducingNode(node)) + continue; + + // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will + // be inferred. + Shape shape{}; + switch (node->opcode()) + { + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + shape = getNodeShape(node); + break; + default: + break; + } + + AffineQuantization quantization; + if (node->quantparam() != nullptr) + { + const luci::CircleQuantParam *params = node->quantparam(); + assert(params->scale.size() == params->zerop.size()); + quantization.scale.assign(params->scale.cbegin(), params->scale.cend()); + quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend()); + quantization.quantized_dimension = params->quantized_dimension; + } + + auto tensor = std::make_unique(node->dtype(), std::move(shape), std::move(quantization), + node->name()); + + // If node has execution plan then read memory offsets for nodes + // from the beginning of shared memory buffer. Used in Static Memory Manager. + if (luci::has_execution_plan(node)) + { + auto execution_plan = luci::get_execution_plan(node); + assert(!execution_plan.offsets().empty()); + tensor->set_offset(execution_plan.offsets().front()); + } + + if (const auto *const_node = dynamic_cast(node)) + { + size_t data_size{}; + const void *const_data = getNodeData(const_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } + else if (const auto *custom_out_node = dynamic_cast(node)) + { + const auto *custom_node = + loco::must_cast(custom_out_node->input()); + + if (custom_node->custom_code() == "CircleReferencingConst") + { + size_t data_size{}; + const void *const_data = getNodeData(custom_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } + } + + _node_to_tensor.emplace(node, tensor.get()); + _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node); + + _runtime_graph->addTensor(std::move(tensor)); + } +} + +void GraphLoader::initInputOutputTensors() const +{ + auto input_nodes = loco::input_nodes(_graph); + std::vector input_tensors(input_nodes.size()); + for (size_t i = 0; i < input_nodes.size(); ++i) + { + input_tensors[i] = _node_to_tensor.at(input_nodes[i]); + _memory_manager->allocate_memory(*input_tensors[i]); + } + _runtime_graph->setInputTensors(input_tensors); + + auto output_nodes = loco::output_nodes(const_cast(_graph)); + std::vector output_tensors(output_nodes.size()); + for (size_t i = 0; i < output_nodes.size(); ++i) + { + const auto *node = loco::must_cast(output_nodes[i]); + output_tensors[i] = _node_to_tensor.at(node->from()); + } + _runtime_graph->setOutputTensors(output_tensors); +} + +void GraphLoader::loadOperators() +{ + KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor); + + // Create kernels for executable nodes. This has to be done in execution order. + auto graph = const_cast(_graph); + + auto const graph_nodes = loco::all_nodes(graph); + + // Checking for execution plan in node annotations. + bool has_execution_annotation = true; + auto const checking_exec_plan = [&has_execution_annotation](auto const node) { + const auto *circle_node = loco::must_cast(node); + if (!luci::has_execution_plan(circle_node)) + has_execution_annotation = false; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan); + + if (has_execution_annotation) + { + // Build ordered_nodes vector that stores the order of execution of graph nodes. + std::vector ordered_nodes(graph_nodes.size()); + + auto const filler = [&ordered_nodes](auto const node) { + const auto *circle_node = loco::must_cast(node); + auto const position = luci::get_execution_plan(circle_node).order_in_plan(); + ordered_nodes.at(position) = circle_node; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), filler); + + for (auto node : ordered_nodes) + { + if (isExecutableNode(node)) + { + std::unique_ptr kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } + else + { + // If it is impossible to build the execution order plan, + // then we use the default postorder_traversal approach. + for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph))) + { + const auto *node = loco::must_cast(loco_node); + if (isExecutableNode(node)) + { + std::unique_ptr kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h new file mode 100644 index 0000000..fe066ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H +#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H + +#include "core/RuntimeGraph.h" +#include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" + +#include + +#include + +namespace luci_interpreter +{ + +class GraphLoader +{ +public: + GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, + const std::unordered_map &graph_to_runtime_graph, + std::unordered_map &node_to_tensor, + IMemoryManager *memory_manager); + + void loadTensors(); + void initInputOutputTensors() const; + void loadOperators(); + +private: + const loco::Graph *_graph; + RuntimeGraph *_runtime_graph; + RuntimeToIR &_runtime_to_ir; + IMemoryManager *_memory_manager; + + const std::unordered_map &_graph_to_runtime_graph; + std::unordered_map &_node_to_tensor; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp new file mode 100644 index 0000000..8483a9a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/KernelBuilder.h" +#include "loader/nodes/Builders.h" + +#include + +namespace luci_interpreter +{ + +#define CIRCLE_NODE(OPCODE, CLASS) CLASS, +#define CIRCLE_VNODE(OPCODE, CLASS) CLASS, + +// This enum is auxiliary. +// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE, +// because list of target operators is in format of CLASS names +enum class BuilderId +{ +#include + Size // casts to count of values in BuilderId enum +}; + +#undef CIRCLE_VNODE +#undef CIRCLE_NODE + +/** + * @brief Registry of kernel builders + * + * This class contains mapping from Opcodes to kernel builder functions + */ + +class KernelBuilderRegistry +{ +public: + using KernelBuilderFunc = std::unique_ptr(const luci::CircleNode *, + KernelBuilderHelper &); + + KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr) + { +#define REGISTER_KERNEL(name) \ + register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + } + + KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const + { + return _operator_builders.at(size_t(opcode)); + } + +private: + std::vector _operator_builders; + + void register_kernel_builder(BuilderId id, KernelBuilderFunc *func) + { + // Using BuilderId is a duplicate of luci::CirclreOpcode, + // size_t(id) is equal to size_t(corresponding operation opcode). + assert(size_t(id) < _operator_builders.size()); + _operator_builders[size_t(id)] = func; + } +}; + +KernelBuilder::KernelBuilder( + const std::unordered_map &graph_to_runtime_graph, + const std::unordered_map &node_to_tensor) + : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) +{ + _builder_registry = std::make_unique(); +} + +KernelBuilder::~KernelBuilder() +{ + // Need to define in this CPP to hide KernelBuilderRegistry internals. + // This destructor deletes _builder_registry +} + +std::unique_ptr KernelBuilder::build(const luci::CircleNode *node) +{ + auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode()); + if (specific_builder != nullptr) + return specific_builder(node, *this); + + std::string msg = "Unsupported operator: "; + msg += std::to_string(static_cast(node->opcode())) + " " + std::string(node->name()); + throw std::invalid_argument(msg.c_str()); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h new file mode 100644 index 0000000..b1f3833 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H +#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H + +#include "loader/KernelBuilderHelper.h" + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +#include + +#include +#include + +namespace luci_interpreter +{ + +class KernelBuilderRegistry; + +class KernelBuilder : public KernelBuilderHelper +{ +public: + KernelBuilder( + const std::unordered_map &graph_to_runtime_graph, + const std::unordered_map &node_to_tensor); + + ~KernelBuilder(); + + std::unique_ptr build(const luci::CircleNode *node); + +private: + std::unique_ptr _builder_registry; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp new file mode 100644 index 0000000..b221b69 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -0,0 +1,1376 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/GraphLoader.h" +#include "loader/KernelBuilder.h" +#include "luci_interpreter/SimpleMemoryManager.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +class KernelBuilderTest : public Test +{ +protected: + luci::CircleInput *createInputNode() { return createNode(); } + void SetUp() override { _memory_manager = std::make_unique(); } + + std::unique_ptr _memory_manager; + + template NodeT *createNode(Args &&... args) + { + auto *node = _graph.nodes()->create(std::forward(args)...); + // The actual type does not matter for the purpose of the tests. + // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry + // actual output types). + node->dtype(loco::DataType::FLOAT32); + return node; + } + + template NodeOutT *createNodeOut(loco::Node *node, int index) + { + auto *node_out = createNode(); + node_out->input(node); + node_out->index(index); + return node_out; + } + + template std::unique_ptr buildKernel(const luci::CircleNode *op) + { + std::unordered_map graph_to_runtime_graph; + + RuntimeGraph runtime_graph(nullptr, _memory_manager.get()); + graph_to_runtime_graph[&_graph] = &runtime_graph; + RuntimeToIR runtime_to_ir; + GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph, + _node_to_tensor, _memory_manager.get()); + graph_loader.loadTensors(); + + KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor); + + auto kernel = kernel_builder.build(op); + return std::unique_ptr(dynamic_cast(kernel.release())); + } + + void checkTensor(const Tensor *tensor, const loco::Node *node) + { + EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node))); + } + +private: + loco::Graph _graph; + std::unordered_map _node_to_tensor; +}; + +TEST_F(KernelBuilderTest, Add) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, ArgMax) +{ + auto *input = createInputNode(); + auto *axis = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->dimension(axis); + + op->output_type(loco::DataType::FLOAT32); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().output_type, Eq(op->output_type())); +} + +TEST_F(KernelBuilderTest, AveragePool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, BatchMatMul) +{ + auto *lhs = createInputNode(); + auto *rhs = createInputNode(); + + auto *op = createNode(); + op->x(lhs); + op->y(rhs); + op->adj_x(false); + op->adj_y(false); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), lhs); + checkTensor(kernel->y(), rhs); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x())); + EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y())); +} + +TEST_F(KernelBuilderTest, Cast) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Concatenation) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(2); + op->values(0, input1); + op->values(1, input2); + op->axis(11); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(0), input1); + checkTensor(kernel->input(1), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Conv2D) +{ + auto *input = createInputNode(); + auto *filter = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->filter(filter); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->stride()->h(11); + op->stride()->w(13); + op->dilation()->h(17); + op->dilation()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h())); + EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, DepthToSpace) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->input(input); + + op->block_size(11); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().block_size, Eq(op->block_size())); +} + +TEST_F(KernelBuilderTest, DepthwiseConv2D) +{ + auto *input = createInputNode(); + auto *filter = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->filter(filter); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->depthMultiplier(11); + op->stride()->h(13); + op->stride()->w(17); + op->dilation()->h(19); + op->dilation()->w(23); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h())); + EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Div) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Elu) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->features(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Exp) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Floor) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, FloorDiv) +{ + auto *x = createInputNode(); + auto *y = createInputNode(); + + auto *op = createNode(); + op->x(x); + op->y(y); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x); + checkTensor(kernel->y(), y); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Equal) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, FullyConnected) +{ + auto *input = createInputNode(); + auto *weights = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->weights(weights); + op->bias(bias); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->weights(), weights); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Greater) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, GreaterEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, InstanceNorm) +{ + auto *input = createInputNode(); + auto *gamma = createInputNode(); + auto *beta = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->gamma(gamma); + op->beta(beta); + + op->epsilon(1e-05); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->gamma(), gamma); + checkTensor(kernel->beta(), beta); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, L2Normalize) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, L2Pool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, LeakyRelu) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->features(input); + + op->alpha(11.0f); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().alpha, Eq(op->alpha())); +} + +TEST_F(KernelBuilderTest, Less) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LessEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LocalResponseNormalization) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->input(input); + + op->radius(11); + op->bias(13.0f); + op->alpha(15.0f); + op->beta(17.0f); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().radius, Eq(op->radius())); + EXPECT_THAT(kernel->params().bias, Eq(op->bias())); + EXPECT_THAT(kernel->params().alpha, Eq(op->alpha())); + EXPECT_THAT(kernel->params().beta, Eq(op->beta())); +} + +TEST_F(KernelBuilderTest, LogicalAnd) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalNot) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalOr) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Logistic) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogSoftmax) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->logits(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Maximum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, MaxPool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Mean) +{ + auto *input = createInputNode(); + auto *axes = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->reduction_indices(axes); + + op->keep_dims(true); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axes(), axes); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims())); +} + +TEST_F(KernelBuilderTest, Minimum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Mul) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Neg) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, NotEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, OneHot) +{ + auto *indices = createInputNode(); + auto *depth = createInputNode(); + auto *on_value = createInputNode(); + auto *off_value = createInputNode(); + auto axis = 1; + + auto *op = createNode(); + op->indices(indices); + op->depth(depth); + op->on_value(on_value); + op->off_value(off_value); + op->axis(axis); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->indices(), indices); + checkTensor(kernel->depth(), depth); + checkTensor(kernel->on_value(), on_value); + checkTensor(kernel->off_value(), off_value); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + +TEST_F(KernelBuilderTest, Pad) +{ + auto *input = createInputNode(); + auto *paddings = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->paddings(paddings); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->paddings(), paddings); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, PadV2) +{ + auto *input = createInputNode(); + auto *paddings = createInputNode(); + auto *constant_values = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->paddings(paddings); + op->constant_values(constant_values); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->paddings(), paddings); + checkTensor(kernel->constant_values(), constant_values); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Pow) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, PRelu) +{ + auto *input = createInputNode(); + auto *alpha = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->alpha(alpha); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->alpha(), alpha); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->features(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu6) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->features(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Reshape) +{ + auto *input = createInputNode(); + auto *shape = createInputNode(); + + auto *op = createNode(); + op->tensor(input); + op->shape(shape); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->shape(), shape); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, ResizeBilinear) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->size(size); + op->align_corners(true); + op->half_pixel_centers(true); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers())); +} + +TEST_F(KernelBuilderTest, ResizeNearestNeighbor) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->size(size); + op->align_corners(true); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor + // after adding, need to be updated. +} + +TEST_F(KernelBuilderTest, ReverseV2) +{ + auto *input = createInputNode(); + auto *axes = createInputNode(); + + auto *op = createNode(); + op->tensor(input); + op->axis(axes); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axes(), axes); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Rsqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Slice) +{ + auto *input = createInputNode(); + auto *begin = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->begin(begin); + op->size(size); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->begin(), begin); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Softmax) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->logits(input); + + op->beta(11.0f); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().beta, Eq(op->beta())); +} + +TEST_F(KernelBuilderTest, SpaceToDepth) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->input(input); + + op->block_size(11); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().block_size, op->block_size()); +} + +TEST_F(KernelBuilderTest, Split) +{ + auto *axis = createInputNode(); + auto *input = createInputNode(); + auto *op = createNode(); + auto *output1 = createNodeOut(op, 0); + auto *output2 = createNodeOut(op, 1); + + op->split_dim(axis); + op->input(input); + + op->num_split(2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->axis(), axis); + checkTensor(kernel->input(), input); + checkTensor(kernel->output(0), output1); + checkTensor(kernel->output(1), output2); +} + +TEST_F(KernelBuilderTest, SplitV) +{ + auto *input = createInputNode(); + auto *size_splits = createInputNode(); + auto *axis = createInputNode(); + auto *op = createNode(); + auto *output0 = createNodeOut(op, 0); + auto *output1 = createNodeOut(op, 1); + + op->input(input); + op->size_splits(size_splits); + op->split_dim(axis); + + op->num_split(2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size_splits(), size_splits); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(0), output0); + checkTensor(kernel->output(1), output1); +} + +TEST_F(KernelBuilderTest, Sqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, SquaredDifference) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Squeeze) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->input(input); + + op->squeeze_dims({11, 13}); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims())); +} + +TEST_F(KernelBuilderTest, StridedSlice) +{ + auto *input = createInputNode(); + auto *begin = createInputNode(); + auto *end = createInputNode(); + auto *strides = createInputNode(); + + auto *op = createNode(); + op->input(input); + op->begin(begin); + op->end(end); + op->strides(strides); + + op->begin_mask(11); + op->ellipsis_mask(13); + op->end_mask(17); + op->new_axis_mask(19); + op->shrink_axis_mask(23); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->begin(), begin); + checkTensor(kernel->end(), end); + checkTensor(kernel->strides(), strides); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask())); + EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask())); + EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask())); + EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask())); + EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask())); +} + +TEST_F(KernelBuilderTest, Sub) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Tanh) +{ + auto *input = createInputNode(); + + auto *op = createNode(); + op->x(input); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Transpose) +{ + auto *input = createInputNode(); + auto *perm = createInputNode(); + + auto *op = createNode(); + op->a(input); + op->perm(perm); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->perm(), perm); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, TransposeConv) +{ + auto *output_shape = createInputNode(); + auto *filter = createInputNode(); + auto *input = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode(); + op->inputSizes(output_shape); + op->filter(filter); + op->outBackprop(input); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->stride()->h(11); + op->stride()->w(13); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->output_shape(), output_shape); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + checkTensor(kernel->bias(), bias); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); +} + +TEST_F(KernelBuilderTest, Unpack) +{ + auto *input = createInputNode(); + auto *op = createNode(); + auto *output1 = createNodeOut(op, 0); + auto *output2 = createNodeOut(op, 1); + + op->value(input); + + op->num(2); + op->axis(11); + + auto kernel = buildKernel(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(0), output1); + checkTensor(kernel->output(1), output2); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + +TEST_F(KernelBuilderTest, NonExisting1_NEG) +{ + auto *op = createNode(); + ASSERT_ANY_THROW(buildKernel(op)); +} + +TEST_F(KernelBuilderTest, NonExisting2_NEG) +{ + auto *op = createNode(); + ASSERT_ANY_THROW(buildKernel(op)); +} + +TEST_F(KernelBuilderTest, NonExisting3_NEG) +{ + auto *op = createNode(); + ASSERT_ANY_THROW(buildKernel(op)); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp new file mode 100644 index 0000000..23c96a6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/KernelBuilderHelper.h" + +#include + +namespace luci_interpreter +{ + +const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const +{ + const Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const +{ + if (dynamic_cast(node)) + { + return nullptr; + } + return getInputTensor(node); +} + +Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const +{ + Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +std::vector +KernelBuilderHelper::getOutputTensors(const std::vector &nodes) const +{ + std::vector tensors; + tensors.reserve(nodes.size()); + for (const loco::Node *node : nodes) + tensors.push_back(getOutputTensor(node)); + return tensors; +} + +RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const +{ + RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); + assert(runtime_graph != nullptr); + return runtime_graph; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h new file mode 100644 index 0000000..d6fb253 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H +#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +#include +#include + +#include +#include + +namespace luci_interpreter +{ + +class KernelBuilderHelper +{ +public: + KernelBuilderHelper( + const std::unordered_map &graph_to_runtime_graph, + const std::unordered_map &node_to_tensor) + : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) + { + } + +public: + const Tensor *getInputTensor(const loco::Node *node) const; + const Tensor *getOptionalInputTensor(const loco::Node *node) const; + + Tensor *getOutputTensor(const loco::Node *node) const; + std::vector getOutputTensors(const std::vector &nodes) const; + + RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const; + +public: + const std::unordered_map &graph_to_runtime_graph() const + { + return _graph_to_runtime_graph; + } + + const std::unordered_map &node_to_tensor() const + { + return _node_to_tensor; + } + +private: + const std::unordered_map &_graph_to_runtime_graph; + const std::unordered_map &_node_to_tensor; +}; + +template +std::vector collectOutputNodes(const loco::Node *node) +{ + std::vector output_nodes; + for (const loco::Node *loco_node : loco::succs(node)) + { + output_nodes.push_back(loco::must_cast(loco_node)); + } + std::sort(output_nodes.begin(), output_nodes.end(), + [](const CircleNodeOut *node1, const CircleNodeOut *node2) { + return node1->index() < node2->index(); + }); + return {output_nodes.cbegin(), output_nodes.cend()}; +} + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp new file mode 100644 index 0000000..2f278b0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleLoader.h" + +#include "GraphLoader.h" + +namespace luci_interpreter +{ + +ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, + RuntimeToIR &runtime_to_ir, + std::unordered_map &node_to_tensor, + IMemoryManager *memory_manager) + : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir), + _node_to_tensor(node_to_tensor), _memory_manager(memory_manager) +{ +} + +void ModuleLoader::load() +{ + // Runtime graphs have to be created in advance, because they will be needed during the loading + // process for control flow nodes. + for (size_t i = 0; i < _module->size(); ++i) + { + _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager)); + } + for (size_t i = 0; i < _module->size(); ++i) + { + const loco::Graph *graph = _module->graph(i); + RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); + GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph, + _node_to_tensor, _memory_manager); + loader.loadTensors(); + loader.initInputOutputTensors(); + loader.loadOperators(); + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h new file mode 100644 index 0000000..11326a2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H +#define LUCI_INTERPRETER_LOADER_MODULELOADER_H + +#include "core/RuntimeModule.h" +#include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" + +#include + +#include + +namespace luci_interpreter +{ + +class ModuleLoader +{ +public: + ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, + RuntimeToIR &runtime_to_ir, + std::unordered_map &node_to_tensor, + IMemoryManager *memory_manager); + + void load(); + +private: + IMemoryManager *_memory_manager; + const luci::Module *_module; + RuntimeModule *_runtime_module; + RuntimeToIR &_runtime_to_ir; + std::unordered_map &_node_to_tensor; + std::unordered_map _graph_to_runtime_graph; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h new file mode 100644 index 0000000..9ea8b1f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H +#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H + +#include "luci_interpreter/core/Tensor.h" + +#include + +#include + +namespace luci_interpreter +{ + +// Maps runtime entities back to IR entities. It is used to implement observing functionality. +struct RuntimeToIR +{ + std::unordered_map tensor_to_node; + std::unordered_map kernel_to_node; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp new file mode 100644 index 0000000..501e847 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Add.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleAdd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + AddParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp new file mode 100644 index 0000000..f3ca557 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ArgMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleArgMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->dimension()); + Tensor *output = helper.getOutputTensor(node); + + ArgMaxParams params{}; + params.output_type = node->output_type(); + + return std::make_unique(input, axis, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp new file mode 100644 index 0000000..a813570 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/AveragePool2D.h" +#include + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique(input, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp new file mode 100644 index 0000000..9da2f6d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchMatMul.h" +#include + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *lhs = helper.getInputTensor(node->x()); + const Tensor *rhs = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + auto lhs_scratchpad = + std::make_unique(lhs->element_type(), Shape({}), AffineQuantization{}, ""); + lhs_scratchpad->set_observable(false); + lhs_scratchpad->set_data_buffer(nullptr); + auto rhs_scratchpad = + std::make_unique(rhs->element_type(), Shape({}), AffineQuantization{}, ""); + rhs_scratchpad->set_observable(false); + rhs_scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current BatchMatMul temporary was found. + if (execution_plan.offsets().size() > 1) + { + assert(execution_plan.offsets().size() == 3); + + // If this is true, then we keep this offset in scratchpad. + lhs_scratchpad->set_offset(execution_plan.offsets().at(1)); + rhs_scratchpad->set_offset(execution_plan.offsets().at(2)); + } + } + Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad)); + Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad)); + + BatchMatMulParams params; + params.adj_x = node->adj_x(); + params.adj_y = node->adj_y(); + + return std::make_unique(lhs, rhs, output, lhs_tmp, rhs_tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp new file mode 100644 index 0000000..ac6ebb3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchToSpaceND.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *crops = helper.getInputTensor(node->crops()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, block_shape, crops, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h new file mode 100644 index 0000000..eab2840 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H +#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H + +#include "loader/KernelBuilderHelper.h" + +#include "luci/IR/CircleNodes.h" + +namespace luci_interpreter +{ + +#define REGISTER_KERNEL(name) \ + std::unique_ptr build_kernel_Circle##name(const luci::CircleNode *circle_node, \ + KernelBuilderHelper &helper); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp new file mode 100644 index 0000000..a16354c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Cast.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleCast(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp new file mode 100644 index 0000000..ba2564e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Concatenation.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + std::vector inputs(node->numValues()); + for (uint32_t i = 0; i < node->numValues(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + ConcatenationParams params{}; + params.axis = node->axis(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp new file mode 100644 index 0000000..218165e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Conv2D.h" +#include + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + Conv2DParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp new file mode 100644 index 0000000..1749463 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthToSpace.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + DepthToSpaceParams params{}; + params.block_size = node->block_size(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp new file mode 100644 index 0000000..8af1e3b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthwiseConv2D.h" +#include + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + DepthwiseConv2DParams params{}; + params.padding = node->padding(); + params.depth_multiplier = node->depthMultiplier(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp new file mode 100644 index 0000000..787322e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Dequantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleDequantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp new file mode 100644 index 0000000..0611dfd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Div.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + DivParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp new file mode 100644 index 0000000..a79985e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Elu.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleElu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp new file mode 100644 index 0000000..5969288 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Equal.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) + +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp new file mode 100644 index 0000000..30d11cb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Exp.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleExp(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp new file mode 100644 index 0000000..9840c34 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ExpandDims.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleExpandDims(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, axis, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp new file mode 100644 index 0000000..3aefdf1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Fill.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleFill(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const auto dims = helper.getInputTensor(node->dims()); + const auto value = helper.getInputTensor(node->value()); + auto output = helper.getOutputTensor(node); + + return std::make_unique(dims, value, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp new file mode 100644 index 0000000..e0a2231 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Floor.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleFloor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp new file mode 100644 index 0000000..a45d89e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FloorDiv.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp new file mode 100644 index 0000000..b7b742b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FullyConnected.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *weights = helper.getInputTensor(node->weights()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + FullyConnectedParams params{}; + params.activation = node->fusedActivationFunction(); + params.keep_num_dims = node->keep_num_dims(); + + return std::make_unique(input, weights, bias, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp new file mode 100644 index 0000000..2ee2906 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Gather.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleGather(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *params = helper.getInputTensor(node->params()); + const Tensor *indices = helper.getInputTensor(node->indices()); + Tensor *output = helper.getOutputTensor(node); + + GatherParams gparams{}; + gparams.axis = node->axis(); + // TODO support batch_dims + gparams.batch_dims = 0; + + return std::make_unique(params, indices, output, gparams); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp new file mode 100644 index 0000000..80aa63c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Greater.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleGreater(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp new file mode 100644 index 0000000..272f284 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/GreaterEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp new file mode 100644 index 0000000..3ac7d49 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/If.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleIf(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + auto output_nodes = collectOutputNodes(node); + assert(node->arity() == 1 + node->input_count()); + assert(output_nodes.size() == static_cast(node->output_count())); + + const Tensor *cond = helper.getInputTensor(node->cond()); + std::vector inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph()); + RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph()); + + return std::make_unique(cond, std::move(inputs), std::move(outputs), then_graph, + else_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp new file mode 100644 index 0000000..06031e5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/InstanceNorm.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *gamma = helper.getInputTensor(node->gamma()); + const Tensor *beta = helper.getInputTensor(node->beta()); + + Tensor *output = helper.getOutputTensor(node); + + InstanceNormParams params{}; + params.epsilon = node->epsilon(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input, gamma, beta, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp new file mode 100644 index 0000000..6e22e6d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Normalize.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + L2NormParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp new file mode 100644 index 0000000..95b5589 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Pool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp new file mode 100644 index 0000000..bbf5067 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LeakyRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + LeakyReluParams params{}; + params.alpha = node->alpha(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp new file mode 100644 index 0000000..ae914ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Less.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLess(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp new file mode 100644 index 0000000..f1b424b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LessEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp new file mode 100644 index 0000000..962ca2d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LocalResponseNormalization.h" + +namespace luci_interpreter +{ + +std::unique_ptr +build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + LocalResponseNormalizationParams params{}; + params.radius = node->radius(); + params.bias = node->bias(); + params.alpha = node->alpha(); + params.beta = node->beta(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp new file mode 100644 index 0000000..4322041 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogSoftmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp new file mode 100644 index 0000000..bf3cb67 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalAnd.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp new file mode 100644 index 0000000..fefcd9a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalNot.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp new file mode 100644 index 0000000..a416cb4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalOr.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp new file mode 100644 index 0000000..4a69dee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Logistic.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleLogistic(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp new file mode 100644 index 0000000..f66a206 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MaxPool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp new file mode 100644 index 0000000..d0bff77 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Maximum.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMaximum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp new file mode 100644 index 0000000..0dec63e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mean.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMean(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + auto temp_sum_unique = + std::make_unique(input->element_type(), Shape({}), AffineQuantization{}, ""); + temp_sum_unique->set_observable(false); + temp_sum_unique->set_data_buffer(nullptr); + Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique(input, axes, output, temp_index, resolved_axes, temp_sum, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp new file mode 100644 index 0000000..1a49c10 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Minimum.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMinimum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp new file mode 100644 index 0000000..b221b45 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MirrorPad.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + MirrorPadParams params{}; + params.mode = node->mode(); + + return std::make_unique(input, paddings, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp new file mode 100644 index 0000000..f998485 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mul.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + MulParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp new file mode 100644 index 0000000..9a9ecf9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Neg.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleNeg(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp new file mode 100644 index 0000000..3916a58 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/NotEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp new file mode 100644 index 0000000..a401609 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/OneHot.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleOneHot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 4); + + const Tensor *indices = helper.getInputTensor(node->indices()); + const Tensor *depth = helper.getInputTensor(node->depth()); + const Tensor *on_value = helper.getInputTensor(node->on_value()); + const Tensor *off_value = helper.getInputTensor(node->off_value()); + Tensor *output = helper.getOutputTensor(node); + + OneHotParams params{}; + params.axis = node->axis(); + + return std::make_unique(indices, depth, on_value, off_value, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp new file mode 100644 index 0000000..f3d700c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CirclePRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *alpha = helper.getInputTensor(node->alpha()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, alpha, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp new file mode 100644 index 0000000..efc5850 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pack.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CirclePack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == node->values_count()); + + std::vector inputs(node->values_count()); + for (uint32_t i = 0; i < node->values_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + PackParams params{}; + params.axis = node->axis(); + params.values_count = node->values_count(); + + return std::make_unique(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp new file mode 100644 index 0000000..67ce997 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pad.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CirclePad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp new file mode 100644 index 0000000..e378a97 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PadV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CirclePadV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + const Tensor *constant_values = helper.getInputTensor(node->constant_values()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, paddings, constant_values, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp new file mode 100644 index 0000000..d32fc3d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pow.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CirclePow(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp new file mode 100644 index 0000000..cb36fb6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Quantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleQuantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp new file mode 100644 index 0000000..1d64c1c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp new file mode 100644 index 0000000..e50cd25 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu6.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleRelu6(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp new file mode 100644 index 0000000..76ddd88 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Reshape.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleReshape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *shape = helper.getInputTensor(node->shape()); + Tensor *output = helper.getOutputTensor(node); + + // NOTE 'newShape' attribute is ignored. + return std::make_unique(input, shape, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp new file mode 100644 index 0000000..dc2b88a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeBilinear.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeBilinearParams params{}; + params.align_corners = node->align_corners(); + params.half_pixel_centers = node->half_pixel_centers(); + + return std::make_unique(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp new file mode 100644 index 0000000..c7058ae --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeNearestNeighbor.h" + +namespace luci_interpreter +{ + +std::unique_ptr +build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeNearestNeighborParams params{}; + params.align_corners = node->align_corners(); + // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated + // Current CircleResizeNearestNeighbor don't have half_pixel_centers. + // default value on current is false. + // it need to be updated when CircleResizeNearestNeighbor updated. + params.half_pixel_centers = false; + + return std::make_unique(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp new file mode 100644 index 0000000..c1a7f53 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReverseV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *axes = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, axes, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp new file mode 100644 index 0000000..0714a5d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Rsqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp new file mode 100644 index 0000000..d172ef4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SVDF.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSVDF(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 5); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *feature = helper.getInputTensor(node->weight_feature()); + const Tensor *time = helper.getInputTensor(node->weight_time()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state()); + Tensor *output = helper.getOutputTensor(node); + + auto scratchpad_tensor = std::make_unique(input_activation_state->element_type(), + Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32; + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + if (data_type == DataType::FLOAT32 && + (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8)) + { + data_type = feature->element_type(); + } + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + data_type = DataType::FLOAT32; + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + SVDFParams params{}; + params.activation = node->fusedActivationFunction(); + params.svdf_rank = node->svdf_rank(); + params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs(); + + return std::make_unique(input, feature, time, bias, input_activation_state, output, + tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp new file mode 100644 index 0000000..d1edbc7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Shape.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleShape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const auto input = helper.getInputTensor(node->input()); + auto output = helper.getOutputTensor(node); + + ShapeParams shape_params{}; + shape_params.out_type = node->out_type(); + + return std::make_unique(input, output, shape_params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp new file mode 100644 index 0000000..60ac641 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Slice.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *size = helper.getInputTensor(node->size()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, begin, size, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp new file mode 100644 index 0000000..f41f63f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Softmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + SoftmaxParams params{}; + params.beta = node->beta(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp new file mode 100644 index 0000000..b6e6cf5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToBatchND.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, block_shape, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp new file mode 100644 index 0000000..63fdb95 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToDepth.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + + Tensor *output = helper.getOutputTensor(node); + + SpaceToDepthParams params{}; + params.block_size = node->block_size(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp new file mode 100644 index 0000000..3f6d4a7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Split.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSplit(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + auto output_nodes = collectOutputNodes(node); + assert(node->arity() == 2); + assert(output_nodes.size() == static_cast(node->num_split())); + + const Tensor *axis = helper.getInputTensor(node->split_dim()); + const Tensor *input = helper.getInputTensor(node->input()); + std::vector outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique(axis, input, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp new file mode 100644 index 0000000..0788822 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SplitV.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSplitV(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + auto output_nodes = collectOutputNodes(node); + assert(node->arity() == 3); + assert(output_nodes.size() == static_cast(node->num_split())); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *sizes_data = helper.getInputTensor(node->size_splits()); + const Tensor *axis = helper.getInputTensor(node->split_dim()); + std::vector outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique(input, sizes_data, axis, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp new file mode 100644 index 0000000..b9843fe --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp new file mode 100644 index 0000000..0ad7c17 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Square.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSquare(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp new file mode 100644 index 0000000..e4c6fd8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SquaredDifference.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp new file mode 100644 index 0000000..6885f80 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Squeeze.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + SqueezeParams params{}; + params.squeeze_dims = node->squeeze_dims(); + + return std::make_unique(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp new file mode 100644 index 0000000..359b4e3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/StridedSlice.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 4); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *end = helper.getInputTensor(node->end()); + const Tensor *strides = helper.getInputTensor(node->strides()); + + Tensor *output = helper.getOutputTensor(node); + + StridedSliceParams params{}; + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + return std::make_unique(input, begin, end, strides, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp new file mode 100644 index 0000000..a6252cb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sub.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleSub(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + SubParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp new file mode 100644 index 0000000..a58ef60 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Tanh.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleTanh(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp new file mode 100644 index 0000000..ea17d83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Transpose.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleTranspose(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->a()); + const Tensor *perm = helper.getInputTensor(node->perm()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique(input, perm, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp new file mode 100644 index 0000000..d773e30 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/TransposeConv.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + assert(node->arity() == 4); + + const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *out_backprop = helper.getInputTensor(node->outBackprop()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + + Tensor *output = helper.getOutputTensor(node); + + DataType scratch_data_type = + helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + + auto scratch_tensor = + std::make_unique(scratch_data_type, Shape({}), AffineQuantization{}, ""); + scratch_tensor->set_observable(false); + scratch_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor)); + + TransposeConvParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + + return std::make_unique(input_sizes, filter, out_backprop, bias, output, + tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp new file mode 100644 index 0000000..a1c0d32 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Unpack.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleUnpack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + auto output_nodes = collectOutputNodes(node); + assert(node->arity() == 1); + assert(output_nodes.size() == static_cast(node->num())); + + const Tensor *input = helper.getInputTensor(node->value()); + std::vector outputs = helper.getOutputTensors(output_nodes); + + UnpackParams params{}; + params.axis = node->axis(); + + // NOTE 'num' attribute is ignored. + return std::make_unique(input, std::move(outputs), params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp new file mode 100644 index 0000000..8fde6ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/While.h" + +namespace luci_interpreter +{ + +std::unique_ptr build_kernel_CircleWhile(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast(circle_node); + + auto output_nodes = collectOutputNodes(node); + assert(node->arity() == node->input_count()); + assert(output_nodes.size() == static_cast(node->output_count())); + + std::vector inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph()); + RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph()); + + return std::make_unique(std::move(inputs), std::move(outputs), cond_graph, + body_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt index 7953359..d304826 100644 --- a/compiler/luci-micro/standalone/CMakeLists.txt +++ b/compiler/luci-micro/standalone/CMakeLists.txt @@ -7,6 +7,9 @@ set(BUILD_WHITELIST "dummy") add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc) set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler") +nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET) + +include_directories(${FlatBuffersSource_DIR}/include) add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco) add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor) @@ -14,7 +17,21 @@ add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops) add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str) add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo) add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04) + add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv) add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp) -add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter) diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt index 034fe52..3489f1e 100644 --- a/compiler/luci-pass-value-test/CMakeLists.txt +++ b/compiler/luci-pass-value-test/CMakeLists.txt @@ -17,6 +17,13 @@ macro(addeval RECIPE PASS_OPTION) set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}") set(DASH_PASS_OPTION "--${PASS_OPTION}") + foreach(MORE_OPTIONS ${ARGN}) + list(APPEND DASH_PASS_OPTION "--${MORE_OPTIONS}") + endforeach() + # NOTE if there are two options, 'DASH_PASS_OPTION' will be like '--option_a;--option_b' + # add_custom_command() will translate ';' to two arguments as '--optiona_a --optionb' + # do not use set(DASH_PASS_OPTION "${DASH_PASS_OPTION} --${ARG}")) + # as this will become like '"--optiona_a --optionb"' which is one string argument # Generate optimized .circle add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH} diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst index 67476c6..cdff159 100644 --- a/compiler/luci-pass-value-test/test.lst +++ b/compiler/luci-pass-value-test/test.lst @@ -14,6 +14,8 @@ addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv) addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6) addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6) addeval(Net_Conv_Relu6_000 fuse_activation_function) +addeval(Net_Densify_Add_000 fold_densify) +addeval(Net_Dequantize_Add_000 fold_dequantize) addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv) addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv) addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop) @@ -25,10 +27,17 @@ addeval(Net_TConv_Add_002 fuse_add_with_tconv) addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv) addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv) addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv) +addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv) +addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv) addeval(Net_InstanceNorm_001 fuse_instnorm) addeval(Net_InstanceNorm_002 fuse_instnorm) addeval(Net_InstanceNorm_003 fuse_instnorm) addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice) +addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul) + +# test for limited support for FLOAT16 +addeval(Net_Dequantize_Add_000 fold_dequantize) +addeval(Net_Densify_Dequantize_Add_000 fold_dequantize fold_densify) # test SignatureDef, with any optimization #addeval(SignatureDef_MultiOut_000 fuse_instnorm) diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst index f62b729..932da95 100644 --- a/compiler/luci-value-test/test.lst +++ b/compiler/luci-value-test/test.lst @@ -161,6 +161,8 @@ addeval(Squeeze_001) addeval(StridedSlice_000) addeval(StridedSlice_001) addeval(StridedSlice_002) +addeval(StridedSlice_003) +addeval(StridedSlice_004) addeval(Sub_000) addeval(Sub_U8_000) #addeval(Sum_000) diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h index 0ff21a3..7516197 100644 --- a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h +++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h @@ -118,6 +118,10 @@ public: return circle::CreateCosOptions(_builder).Union(); } flatbuffers::Offset visit(luci::CircleCustom *) { return _no_option; } + flatbuffers::Offset visit(luci::CircleDensify *) + { + return circle::CreateDensifyOptions(_builder).Union(); + } flatbuffers::Offset visit(luci::CircleDepthToSpace *node) { return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union(); diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst index 1b69093..8a75ef7 100644 --- a/compiler/luci/export/src/CircleOps.lst +++ b/compiler/luci/export/src/CircleOps.lst @@ -32,6 +32,7 @@ CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_C CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions) CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions) CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE) +CIRCLE_NODE(CircleDensify, BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions) CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions) CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions) CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions) diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index b3bb850..97e8107 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -434,6 +434,12 @@ flatbuffers::Offset encodeOpBuffer(FlatBufferBuilder &builder, l break; } + // NOTE loco::DataType::FLOAT16 is added but we do not export this type + // as backends currently don't support this type. + // currently this is supported only for "Tensor(Float16) - Dequantize" + // sequence so that after 'fold_dequantize' option this Tensor is + // converted to FLOAT32. + INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype())); } diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 1b2db23..bc0a00b 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -18,6 +18,7 @@ target_link_libraries(luci_import PRIVATE luci_log) target_link_libraries(luci_import PRIVATE luci_logex) target_link_libraries(luci_import PRIVATE nncc_common) target_link_libraries(luci_import PRIVATE locop) +target_link_libraries(luci_import PRIVATE foder) target_link_libraries(luci_import PRIVATE oops) target_link_libraries(luci_import PRIVATE mio_circle04_helper) install(TARGETS luci_import DESTINATION lib) diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h index 7a5045e..a4a6d7c 100644 --- a/compiler/luci/import/include/luci/Import/Nodes.h +++ b/compiler/luci/import/include/luci/Import/Nodes.h @@ -35,6 +35,7 @@ #include "Nodes/CircleConv2D.h" #include "Nodes/CircleCos.h" #include "Nodes/CircleCustom.h" +#include "Nodes/CircleDensify.h" #include "Nodes/CircleDepthToSpace.h" #include "Nodes/CircleDepthwiseConv2D.h" #include "Nodes/CircleDequantize.h" diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h new file mode 100644 index 0000000..42bdac1 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ +#define __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ + +#include "luci/Import/GraphBuilder.h" + +namespace luci +{ + +class CircleDensifyGraphBuilder : public GraphBuilder +{ +public: + bool validate(const ValidateArgs &args) const final; + +private: + CircleNode *build_node(const circle::OperatorT &op, const std::vector &inputs, + loco::Graph *graph) const final; +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ diff --git a/compiler/luci/import/include/luci/ImporterEx.h b/compiler/luci/import/include/luci/ImporterEx.h new file mode 100644 index 0000000..852d4c8 --- /dev/null +++ b/compiler/luci/import/include/luci/ImporterEx.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORTER_EX_H__ +#define __LUCI_IMPORTER_EX_H__ + +#include "luci/IR/Module.h" + +#include +#include + +namespace luci +{ + +class ImporterEx final +{ +public: + ImporterEx() = default; + +public: + std::unique_ptr importVerifyModule(const std::string &input_path) const; +}; + +} // namespace luci + +#endif // __LUCI_IMPORTER_EX_H__ diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp index fe2d830..d3b52aa 100644 --- a/compiler/luci/import/src/GraphBuilderRegistry.cpp +++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp @@ -44,6 +44,7 @@ GraphBuilderRegistry::GraphBuilderRegistry() CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder); // 2 CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder); // 3 CIRCLE_NODE(COS, CircleCosGraphBuilder); // 108 + CIRCLE_NODE(DENSIFY, CircleDensifyGraphBuilder); // 124 CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceGraphBuilder); // 5 CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder); // 4 CIRCLE_NODE(DEQUANTIZE, CircleDequantizeGraphBuilder); // 6 @@ -160,7 +161,6 @@ GraphBuilderRegistry::GraphBuilderRegistry() // BuiltinOperator_DELEGATE = 51, // BuiltinOperator_ARG_MAX = 56, // BuiltinOperator_HARD_SWISH = 117, - // BuiltinOperator_DENSIFY = 124, // Register builders for nodes which not handles in builders registered above. #define CIRCLE_NODE(CLASS) add(std::make_unique()) diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp new file mode 100644 index 0000000..db585fd --- /dev/null +++ b/compiler/luci/import/src/ImporterEx.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Importer.h" +#include "luci/ImporterEx.h" + +#include + +#include +#include + +namespace luci +{ + +std::unique_ptr ImporterEx::importVerifyModule(const std::string &input_path) const +{ + foder::FileLoader file_loader{input_path}; + std::vector model_data; + + try + { + model_data = file_loader.load(); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + return nullptr; + } + + flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + return nullptr; + } + + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + return nullptr; + } + + Importer importer; + return importer.importModule(circle_model); +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index a4f190d..88f2ae3 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -166,6 +166,10 @@ CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index, copy_data(buffer, num_elements, const_node); break; + case loco::DataType::FLOAT16: + copy_data(buffer, num_elements, const_node); + break; + case loco::DataType::U8: copy_data(buffer, num_elements, const_node); break; diff --git a/compiler/luci/import/src/Nodes/CircleDensify.cpp b/compiler/luci/import/src/Nodes/CircleDensify.cpp new file mode 100644 index 0000000..0a4b218 --- /dev/null +++ b/compiler/luci/import/src/Nodes/CircleDensify.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CircleDensify.h" + +#include + +#include + +namespace luci +{ + +bool CircleDensifyGraphBuilder::validate(const ValidateArgs &args) const +{ + return GraphBuilder::validate(args, 1); +} + +CircleNode *CircleDensifyGraphBuilder::build_node(const circle::OperatorT &, + const std::vector &inputs, + loco::Graph *graph) const +{ + auto *node = graph->nodes()->create(); + node->input(inputs.at(0)); + + // No options for Densify + + return node; +} + +} // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h index d89ea03..901f1cb 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.h +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h @@ -32,6 +32,7 @@ #include "Nodes/CircleConv2D.h" #include "Nodes/CircleCos.h" #include "Nodes/CircleCustom.h" +#include "Nodes/CircleDensify.h" #include "Nodes/CircleDepthToSpace.h" #include "Nodes/CircleDepthwiseConv2D.h" #include "Nodes/CircleDequantize.h" diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst index 1472008..f227a03 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst @@ -30,6 +30,7 @@ CIRCLE_NODE(CONCATENATION, CircleConcatenation) CIRCLE_NODE(CONV_2D, CircleConv2D) CIRCLE_NODE(COS, CircleCos) CIRCLE_NODE(CUSTOM, CircleCustom) +CIRCLE_NODE(DENSIFY, CircleDensify) CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpace) CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2D) CIRCLE_NODE(DEQUANTIZE, CircleDequantize) diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h new file mode 100644 index 0000000..7acad03 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_DENSIFY_H__ +#define __LUCI_IR_CIRCLE_DENSIFY_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/CircleNodeMixins.h" + +namespace luci +{ + +/** + * @brief DENSIFY in Circle + */ +class CircleDensify final : public FixedArityNode<1, CircleNodeImpl> +{ +public: + loco::Node *input(void) const { return at(0)->node(); } + void input(loco::Node *node) { at(0)->node(node); } +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_DENSIFY_H__ diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp index c2d82c8..a4854ec 100644 --- a/compiler/luci/lang/src/Nodes/CircleConst.cpp +++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp @@ -77,6 +77,7 @@ INSTANTIATE(loco::DataType::S8); INSTANTIATE(loco::DataType::FLOAT32); INSTANTIATE(loco::DataType::U8); INSTANTIATE(loco::DataType::BOOL); +INSTANTIATE(loco::DataType::FLOAT16); #undef INSTANTIATE diff --git a/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 0000000..ae83784 --- /dev/null +++ b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/Nodes/CircleDensify.h" + +#include "luci/IR/CircleDialect.h" +#include "luci/IR/CircleNodeVisitor.h" + +#include + +TEST(CircleDensifyTest, constructor) +{ + luci::CircleDensify densify_node; + + ASSERT_EQ(luci::CircleDialect::get(), densify_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::DENSIFY, densify_node.opcode()); + + ASSERT_EQ(nullptr, densify_node.input()); +} + +TEST(CircleDensifyTest, input_NEG) +{ + luci::CircleDensify densify_node; + luci::CircleDensify node; + + densify_node.input(&node); + ASSERT_NE(nullptr, densify_node.input()); + + densify_node.input(nullptr); + ASSERT_EQ(nullptr, densify_node.input()); +} + +TEST(CircleDensifyTest, arity_NEG) +{ + luci::CircleDensify densify_node; + + ASSERT_NO_THROW(densify_node.arg(0)); + ASSERT_THROW(densify_node.arg(1), std::out_of_range); +} + +TEST(CircleDensifyTest, visit_mutable_NEG) +{ + struct TestVisitor final : public luci::CircleNodeMutableVisitor + { + }; + + luci::CircleDensify densify_node; + + TestVisitor tv; + ASSERT_THROW(densify_node.accept(&tv), std::exception); +} + +TEST(CircleDensifyTest, visit_NEG) +{ + struct TestVisitor final : public luci::CircleNodeVisitor + { + }; + + luci::CircleDensify densify_node; + + TestVisitor tv; + ASSERT_THROW(densify_node.accept(&tv), std::exception); +} diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp index eff0830..8409f25 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp @@ -137,6 +137,7 @@ CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node) CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder) CIRCLE_NODE(COS, CircleCosSummaryBuilder) CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder) + CIRCLE_NODE(DENSIFY, CircleDensifySummaryBuilder) CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder) CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder) CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder) diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp index 6df9270..48e4579 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp @@ -374,6 +374,22 @@ void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction())); } +void CircleConstSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto circonst = loco::must_cast(node); + s.args().append("dtype", to_str(circonst->dtype())); + s.args().append("rank", std::to_string(circonst->rank())); + std::string shape; + for (uint32_t r = 0; r < circonst->rank(); ++r) + { + if (!shape.empty()) + shape += " "; + shape += std::to_string(circonst->dim(r).value()); + } + s.args().append("shape", "[" + shape + "]"); +} + void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s) { s.state(locop::NodeDesc::State::PartiallyKnown); diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h index 6cd24b7..f0cac4e 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h @@ -167,6 +167,7 @@ private: class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder { private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); void update_status(locop::NodeSummary &s); }; @@ -189,6 +190,10 @@ private: void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); }; +class CircleDensifySummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder { private: diff --git a/compiler/luci/partition/include/luci/ConnectNode.h b/compiler/luci/partition/include/luci/ConnectNode.h new file mode 100644 index 0000000..2d9d41d --- /dev/null +++ b/compiler/luci/partition/include/luci/ConnectNode.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PARTITION_CONNECT_NODE_H__ +#define __LUCI_PARTITION_CONNECT_NODE_H__ + +#include +#include + +namespace luci +{ + +/** + * @note MapNode2Clone is used as a map from original node to cloned node + * to find input of a cloned node + * + * (Original) (Clone) + * + * [A] [A'] + * | [B] | [B'] + * | | | | + * \ / \ / + * [C] [C'] + * + * From view of [C'] we need to find [A'] and [B']. We know [C] from [C'], + * then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B'] + * from the map. + */ +using MapNode2Clone = std::map; + +struct CloneContext +{ + std::pair emplace(const CircleNode *org, CircleNode *clone) + { + return node2clone.emplace(org, clone); + } + MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); } + MapNode2Clone::iterator end(void) { return node2clone.end(); } + + MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); } + MapNode2Clone::const_iterator end(void) const { return node2clone.end(); } + + MapNode2Clone node2clone; +}; + +class ConnectNode final : public luci::CircleNodeVisitor +{ +public: + ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){}; + +public: + void visit(const luci::CircleAbs *) final; + void visit(const luci::CircleAdd *) final; + void visit(const luci::CircleAddN *) final; + void visit(const luci::CircleArgMax *) final; + void visit(const luci::CircleArgMin *) final; + void visit(const luci::CircleAveragePool2D *) final; + void visit(const luci::CircleBatchMatMul *) final; + void visit(const luci::CircleBatchToSpaceND *) final; + void visit(const luci::CircleCast *) final; + void visit(const luci::CircleCeil *) final; + void visit(const luci::CircleConcatenation *) final; + void visit(const luci::CircleConst *) final; + void visit(const luci::CircleConv2D *) final; + void visit(const luci::CircleCos *) final; + void visit(const luci::CircleCustom *) final; + void visit(const luci::CircleDensify *) final; + void visit(const luci::CircleDepthToSpace *) final; + void visit(const luci::CircleDepthwiseConv2D *) final; + void visit(const luci::CircleDequantize *) final; + void visit(const luci::CircleDiv *) final; + void visit(const luci::CircleElu *) final; + void visit(const luci::CircleEqual *) final; + void visit(const luci::CircleExp *) final; + void visit(const luci::CircleExpandDims *) final; + void visit(const luci::CircleFakeQuant *) final; + void visit(const luci::CircleFill *) final; + void visit(const luci::CircleFloor *) final; + void visit(const luci::CircleFloorDiv *) final; + void visit(const luci::CircleFloorMod *) final; + void visit(const luci::CircleFullyConnected *) final; + void visit(const luci::CircleGather *) final; + void visit(const luci::CircleGatherNd *) final; + void visit(const luci::CircleGreater *) final; + void visit(const luci::CircleGreaterEqual *) final; + void visit(const luci::CircleIf *) final; + void visit(const luci::CircleL2Normalize *) final; + void visit(const luci::CircleL2Pool2D *) final; + void visit(const luci::CircleLeakyRelu *) final; + void visit(const luci::CircleLess *) final; + void visit(const luci::CircleLessEqual *) final; + void visit(const luci::CircleLocalResponseNormalization *) final; + void visit(const luci::CircleLog *) final; + void visit(const luci::CircleLogicalAnd *) final; + void visit(const luci::CircleLogicalNot *) final; + void visit(const luci::CircleLogicalOr *) final; + void visit(const luci::CircleLogistic *) final; + void visit(const luci::CircleLogSoftmax *) final; + void visit(const luci::CircleMatrixDiag *) final; + void visit(const luci::CircleMatrixSetDiag *) final; + void visit(const luci::CircleMaximum *) final; + void visit(const luci::CircleMaxPool2D *) final; + void visit(const luci::CircleMean *) final; + void visit(const luci::CircleMinimum *) final; + void visit(const luci::CircleMirrorPad *) final; + void visit(const luci::CircleMul *) final; + void visit(const luci::CircleNeg *) final; + void visit(const luci::CircleNonMaxSuppressionV4 *) final; + void visit(const luci::CircleNonMaxSuppressionV5 *) final; + void visit(const luci::CircleNotEqual *) final; + void visit(const luci::CircleOneHot *) final; + void visit(const luci::CirclePack *) final; + void visit(const luci::CirclePad *) final; + void visit(const luci::CirclePadV2 *) final; + void visit(const luci::CirclePow *) final; + void visit(const luci::CirclePRelu *) final; + void visit(const luci::CircleQuantize *) final; + void visit(const luci::CircleRange *) final; + void visit(const luci::CircleRank *) final; + void visit(const luci::CircleReduceAny *) final; + void visit(const luci::CircleReduceMax *) final; + void visit(const luci::CircleReduceMin *) final; + void visit(const luci::CircleReduceProd *) final; + void visit(const luci::CircleRelu *) final; + void visit(const luci::CircleRelu6 *) final; + void visit(const luci::CircleReluN1To1 *) final; + void visit(const luci::CircleReshape *) final; + void visit(const luci::CircleResizeBilinear *) final; + void visit(const luci::CircleResizeNearestNeighbor *) final; + void visit(const luci::CircleReverseSequence *) final; + void visit(const luci::CircleReverseV2 *) final; + void visit(const luci::CircleRound *) final; + void visit(const luci::CircleRsqrt *) final; + void visit(const luci::CircleScatterNd *) final; + void visit(const luci::CircleSegmentSum *) final; + void visit(const luci::CircleSelect *) final; + void visit(const luci::CircleSelectV2 *) final; + void visit(const luci::CircleShape *) final; + void visit(const luci::CircleSin *) final; + void visit(const luci::CircleSlice *) final; + void visit(const luci::CircleSoftmax *) final; + void visit(const luci::CircleSpaceToBatchND *) final; + void visit(const luci::CircleSpaceToDepth *) final; + void visit(const luci::CircleSparseToDense *) final; + void visit(const luci::CircleSplit *) final; + void visit(const luci::CircleSplitV *) final; + void visit(const luci::CircleSqrt *) final; + void visit(const luci::CircleSquare *) final; + void visit(const luci::CircleSquaredDifference *) final; + void visit(const luci::CircleSqueeze *) final; + void visit(const luci::CircleStridedSlice *) final; + void visit(const luci::CircleSVDF *) final; + void visit(const luci::CircleSub *) final; + void visit(const luci::CircleSum *) final; + void visit(const luci::CircleTanh *) final; + void visit(const luci::CircleTile *) final; + void visit(const luci::CircleTopKV2 *) final; + void visit(const luci::CircleTranspose *) final; + void visit(const luci::CircleTransposeConv *) final; + void visit(const luci::CircleUnidirectionalSequenceLSTM *) final; + void visit(const luci::CircleUnique *) final; + void visit(const luci::CircleUnpack *) final; + void visit(const luci::CircleWhere *) final; + void visit(const luci::CircleWhile *) final; + void visit(const luci::CircleZerosLike *) final; + + // Circle Only + void visit(const luci::CircleBCQFullyConnected *) final; + void visit(const luci::CircleBCQGather *) final; + void visit(const luci::CircleInstanceNorm *) final; + + // NOTE CircleInput and CircleOutput are not handled here as these need + // link with graph I/O + + // Virtual + void visit(const luci::CircleCustomOut *) final; + void visit(const luci::CircleIfOut *) final; + // void visit(const luci::CircleInput *) final; + void visit(const luci::CircleNonMaxSuppressionV4Out *) final; + void visit(const luci::CircleNonMaxSuppressionV5Out *) final; + // void visit(const luci::CircleOutput *) final; + void visit(const luci::CircleOutputDummy *) final; + void visit(const luci::CircleOutputExclude *) final; + void visit(const luci::CircleSplitOut *) final; + void visit(const luci::CircleSplitVOut *) final; + void visit(const luci::CircleTopKV2Out *) final; + void visit(const luci::CircleUniqueOut *) final; + void visit(const luci::CircleUnpackOut *) final; + void visit(const luci::CircleVariable *) final; + void visit(const luci::CircleWhileOut *) final; + +public: + luci::CircleNode *find_clone(const luci::CircleNode *node); + +protected: + luci::CloneContext &_clonecontext; +}; + +/** + * @brief Connect cloned node from input node + */ +void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext); + +} // namespace luci + +#endif // __LUCI_PARTITION_CONNECT_NODE_H__ diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp index 336be7c..3d8c211 100644 --- a/compiler/luci/partition/src/ConnectNode.cpp +++ b/compiler/luci/partition/src/ConnectNode.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h deleted file mode 100644 index e60567c..0000000 --- a/compiler/luci/partition/src/ConnectNode.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __LUCI_PARTITION_CONNECT_NODE_H__ -#define __LUCI_PARTITION_CONNECT_NODE_H__ - -#include -#include - -namespace luci -{ - -/** - * @note MapNode2Clone is used as a map from original node to cloned node - * to find input of a cloned node - * - * (Original) (Clone) - * - * [A] [A'] - * | [B] | [B'] - * | | | | - * \ / \ / - * [C] [C'] - * - * From view of [C'] we need to find [A'] and [B']. We know [C] from [C'], - * then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B'] - * from the map. - */ -using MapNode2Clone = std::map; - -struct CloneContext -{ - std::pair emplace(const CircleNode *org, CircleNode *clone) - { - return node2clone.emplace(org, clone); - } - MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); } - MapNode2Clone::iterator end(void) { return node2clone.end(); } - - MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); } - MapNode2Clone::const_iterator end(void) const { return node2clone.end(); } - - MapNode2Clone node2clone; -}; - -class ConnectNode final : public luci::CircleNodeVisitor -{ -public: - ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){}; - -public: - void visit(const luci::CircleAbs *) final; - void visit(const luci::CircleAdd *) final; - void visit(const luci::CircleAddN *) final; - void visit(const luci::CircleArgMax *) final; - void visit(const luci::CircleArgMin *) final; - void visit(const luci::CircleAveragePool2D *) final; - void visit(const luci::CircleBatchMatMul *) final; - void visit(const luci::CircleBatchToSpaceND *) final; - void visit(const luci::CircleCast *) final; - void visit(const luci::CircleCeil *) final; - void visit(const luci::CircleConcatenation *) final; - void visit(const luci::CircleConst *) final; - void visit(const luci::CircleConv2D *) final; - void visit(const luci::CircleCos *) final; - void visit(const luci::CircleCustom *) final; - void visit(const luci::CircleDepthToSpace *) final; - void visit(const luci::CircleDepthwiseConv2D *) final; - void visit(const luci::CircleDequantize *) final; - void visit(const luci::CircleDiv *) final; - void visit(const luci::CircleElu *) final; - void visit(const luci::CircleEqual *) final; - void visit(const luci::CircleExp *) final; - void visit(const luci::CircleExpandDims *) final; - void visit(const luci::CircleFakeQuant *) final; - void visit(const luci::CircleFill *) final; - void visit(const luci::CircleFloor *) final; - void visit(const luci::CircleFloorDiv *) final; - void visit(const luci::CircleFloorMod *) final; - void visit(const luci::CircleFullyConnected *) final; - void visit(const luci::CircleGather *) final; - void visit(const luci::CircleGatherNd *) final; - void visit(const luci::CircleGreater *) final; - void visit(const luci::CircleGreaterEqual *) final; - void visit(const luci::CircleIf *) final; - void visit(const luci::CircleL2Normalize *) final; - void visit(const luci::CircleL2Pool2D *) final; - void visit(const luci::CircleLeakyRelu *) final; - void visit(const luci::CircleLess *) final; - void visit(const luci::CircleLessEqual *) final; - void visit(const luci::CircleLocalResponseNormalization *) final; - void visit(const luci::CircleLog *) final; - void visit(const luci::CircleLogicalAnd *) final; - void visit(const luci::CircleLogicalNot *) final; - void visit(const luci::CircleLogicalOr *) final; - void visit(const luci::CircleLogistic *) final; - void visit(const luci::CircleLogSoftmax *) final; - void visit(const luci::CircleMatrixDiag *) final; - void visit(const luci::CircleMatrixSetDiag *) final; - void visit(const luci::CircleMaximum *) final; - void visit(const luci::CircleMaxPool2D *) final; - void visit(const luci::CircleMean *) final; - void visit(const luci::CircleMinimum *) final; - void visit(const luci::CircleMirrorPad *) final; - void visit(const luci::CircleMul *) final; - void visit(const luci::CircleNeg *) final; - void visit(const luci::CircleNonMaxSuppressionV4 *) final; - void visit(const luci::CircleNonMaxSuppressionV5 *) final; - void visit(const luci::CircleNotEqual *) final; - void visit(const luci::CircleOneHot *) final; - void visit(const luci::CirclePack *) final; - void visit(const luci::CirclePad *) final; - void visit(const luci::CirclePadV2 *) final; - void visit(const luci::CirclePow *) final; - void visit(const luci::CirclePRelu *) final; - void visit(const luci::CircleQuantize *) final; - void visit(const luci::CircleRange *) final; - void visit(const luci::CircleRank *) final; - void visit(const luci::CircleReduceAny *) final; - void visit(const luci::CircleReduceMax *) final; - void visit(const luci::CircleReduceMin *) final; - void visit(const luci::CircleReduceProd *) final; - void visit(const luci::CircleRelu *) final; - void visit(const luci::CircleRelu6 *) final; - void visit(const luci::CircleReluN1To1 *) final; - void visit(const luci::CircleReshape *) final; - void visit(const luci::CircleResizeBilinear *) final; - void visit(const luci::CircleResizeNearestNeighbor *) final; - void visit(const luci::CircleReverseSequence *) final; - void visit(const luci::CircleReverseV2 *) final; - void visit(const luci::CircleRound *) final; - void visit(const luci::CircleRsqrt *) final; - void visit(const luci::CircleScatterNd *) final; - void visit(const luci::CircleSegmentSum *) final; - void visit(const luci::CircleSelect *) final; - void visit(const luci::CircleSelectV2 *) final; - void visit(const luci::CircleShape *) final; - void visit(const luci::CircleSin *) final; - void visit(const luci::CircleSlice *) final; - void visit(const luci::CircleSoftmax *) final; - void visit(const luci::CircleSpaceToBatchND *) final; - void visit(const luci::CircleSpaceToDepth *) final; - void visit(const luci::CircleSparseToDense *) final; - void visit(const luci::CircleSplit *) final; - void visit(const luci::CircleSplitV *) final; - void visit(const luci::CircleSqrt *) final; - void visit(const luci::CircleSquare *) final; - void visit(const luci::CircleSquaredDifference *) final; - void visit(const luci::CircleSqueeze *) final; - void visit(const luci::CircleStridedSlice *) final; - void visit(const luci::CircleSVDF *) final; - void visit(const luci::CircleSub *) final; - void visit(const luci::CircleSum *) final; - void visit(const luci::CircleTanh *) final; - void visit(const luci::CircleTile *) final; - void visit(const luci::CircleTopKV2 *) final; - void visit(const luci::CircleTranspose *) final; - void visit(const luci::CircleTransposeConv *) final; - void visit(const luci::CircleUnidirectionalSequenceLSTM *) final; - void visit(const luci::CircleUnique *) final; - void visit(const luci::CircleUnpack *) final; - void visit(const luci::CircleWhere *) final; - void visit(const luci::CircleWhile *) final; - void visit(const luci::CircleZerosLike *) final; - - // Circle Only - void visit(const luci::CircleBCQFullyConnected *) final; - void visit(const luci::CircleBCQGather *) final; - void visit(const luci::CircleInstanceNorm *) final; - - // NOTE CircleInput and CircleOutput are not handled here as these need - // link with graph I/O - - // Virtual - void visit(const luci::CircleCustomOut *) final; - void visit(const luci::CircleIfOut *) final; - // void visit(const luci::CircleInput *) final; - void visit(const luci::CircleNonMaxSuppressionV4Out *) final; - void visit(const luci::CircleNonMaxSuppressionV5Out *) final; - // void visit(const luci::CircleOutput *) final; - void visit(const luci::CircleOutputDummy *) final; - void visit(const luci::CircleOutputExclude *) final; - void visit(const luci::CircleSplitOut *) final; - void visit(const luci::CircleSplitVOut *) final; - void visit(const luci::CircleTopKV2Out *) final; - void visit(const luci::CircleUniqueOut *) final; - void visit(const luci::CircleUnpackOut *) final; - void visit(const luci::CircleVariable *) final; - void visit(const luci::CircleWhileOut *) final; - -public: - luci::CircleNode *find_clone(const luci::CircleNode *node); - -protected: - luci::CloneContext &_clonecontext; -}; - -/** - * @brief Connect cloned node from input node - */ -void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext); - -} // namespace luci - -#endif // __LUCI_PARTITION_CONNECT_NODE_H__ diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h index ac4878a..18bb52a 100644 --- a/compiler/luci/partition/src/ConnectNode.test.h +++ b/compiler/luci/partition/src/ConnectNode.test.h @@ -17,7 +17,7 @@ #ifndef __CONNECT_NODE_TEST_H__ #define __CONNECT_NODE_TEST_H__ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include #include diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.cpp index a3fde4c..a7fbc37 100644 --- a/compiler/luci/partition/src/Nodes/CircleAbs.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAbs.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp index f3e7215..ac805c1 100644 --- a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp index d393997..0754be6 100644 --- a/compiler/luci/partition/src/Nodes/CircleAdd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp index e457b83..99ae52c 100644 --- a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.cpp index 81e5e09..90aaeee 100644 --- a/compiler/luci/partition/src/Nodes/CircleAddN.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAddN.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp index 5d0a748..37743d3 100644 --- a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp index 1409586..99b30d3 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp index c816fbe..77248e0 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp index 6151aa9..1bb3d84 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp index d150be4..ed0cf03 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp index 5476657..1df86c7 100644 --- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp index fba2be8..266120b 100644 --- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp index 5b1dd85..6d50f0e 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp index 3d64f4b..2191f5b 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp index 90c4d9e..a9e810a 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp index bbbd3f1..0324d85 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp index c3992a6..5a459e7 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp index 94336d3..e6d26a6 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp index 2a463af..40b8f70 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp index 544f5e1..e9cb350 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCast.cpp b/compiler/luci/partition/src/Nodes/CircleCast.cpp index f7630cd..e1301aa 100644 --- a/compiler/luci/partition/src/Nodes/CircleCast.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCast.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp index 0051190..d7b679a 100644 --- a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.cpp index a0c9403..e7b5f5a 100644 --- a/compiler/luci/partition/src/Nodes/CircleCeil.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCeil.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp index dbd7e53..cb03648 100644 --- a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp index fb24d21..d895685 100644 --- a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp index 4d64b85..b5c05e2 100644 --- a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp index 118cd8d..b88f5ef 100644 --- a/compiler/luci/partition/src/Nodes/CircleConst.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp index 46716f0..ca9cce1 100644 --- a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp index 829adec..4596d96 100644 --- a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCos.cpp b/compiler/luci/partition/src/Nodes/CircleCos.cpp index 9dcf81e..76b1baa 100644 --- a/compiler/luci/partition/src/Nodes/CircleCos.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCos.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp index 6c92b93..ba806a3 100644 --- a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.cpp index ac16ebe..cc16048 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustom.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustom.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp index 9f40b52..f7fe866 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp index fee1a1a..0d83cff 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp index 0a29397..ddd4e93 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.cpp new file mode 100644 index 0000000..cfb236a --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleDensify.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/ConnectNode.h" + +namespace +{ + +void connect(luci::ConnectNode *cn, const luci::CircleDensify *node) +{ + auto *cloned = loco::must_cast(cn->find_clone(node)); + + luci::CircleNode *input = loco::must_cast(node->input()); + + cloned->input(cn->find_clone(input)); +} + +} // namespace + +namespace luci +{ + +void ConnectNode::visit(const luci::CircleDensify *node) { connect(this, node); } + +} // namespace luci diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 0000000..94076a8 --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/ConnectNode.h" + +#include "ConnectNode.test.h" + +#include + +#include + +namespace +{ + +using namespace luci::test; + +class NodeGraphlet : public NodeGraphletT +{ +public: + NodeGraphlet() = default; +}; + +class TestNodeGraph : public TestIOGraph, public NodeGraphlet +{ +public: + TestNodeGraph() = default; + +public: + void init(const ShapeU32 shape) + { + TestIOGraph::init(shape, shape); + NodeGraphlet::init(g()); + + node()->input(input()); + + output()->from(node()); + } +}; + +} // namespace + +TEST(ConnectNodeTest, connect_Densify) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast(clone)); + + cth.clone_connect(node, clone); + + ASSERT_EQ(1, clone->arity()); + ASSERT_EQ(cth.inputs(0), clone->arg(0)); +} + +TEST(ConnectNodeTest, connect_Densify_NEG) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs_miss(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast(clone)); + + EXPECT_ANY_THROW(cth.clone_connect(node, clone)); +} diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp index ade266e..c044b4c 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp index 997360a..1b61a35 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp index 19d1d5f..2bd9ab5 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp index 681f98b..02976a4 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp index 3a520d4..ac2642b 100644 --- a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp index 7f6006c..d3a43d3 100644 --- a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp index 4803385..8941a41 100644 --- a/compiler/luci/partition/src/Nodes/CircleDiv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp index 2269323..7900bea 100644 --- a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleElu.cpp b/compiler/luci/partition/src/Nodes/CircleElu.cpp index d21cd4c..b772265 100644 --- a/compiler/luci/partition/src/Nodes/CircleElu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleElu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp index 94774cc..20b2050 100644 --- a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.cpp index 6a126c0..2dc0e75 100644 --- a/compiler/luci/partition/src/Nodes/CircleEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp index 20b5391..c0d3bd9 100644 --- a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleExp.cpp b/compiler/luci/partition/src/Nodes/CircleExp.cpp index 95fb1cd..c1da790 100644 --- a/compiler/luci/partition/src/Nodes/CircleExp.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExp.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp index 16d7244..286f205 100644 --- a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp index 6fccd63..a6ce649 100644 --- a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp index 8a51565..37af10f 100644 --- a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp index 4855d80..5dfaee1 100644 --- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp index 3821d75..2a2ec0c 100644 --- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFill.cpp b/compiler/luci/partition/src/Nodes/CircleFill.cpp index 06fca7b..32688cd 100644 --- a/compiler/luci/partition/src/Nodes/CircleFill.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFill.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp index 97a5a34..4b3872a 100644 --- a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.cpp index 7ad3924..f7409a2 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloor.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloor.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp index 1a964ea..883d362 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp index 3b92b00..57e435c 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp index 3d28015..1eb603c 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp index 9f868d0..1b942d2 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp index 89a0941..680bf16 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp index da27303..206b47a 100644 --- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp index fc88204..39eea55 100644 --- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGather.cpp b/compiler/luci/partition/src/Nodes/CircleGather.cpp index 0ee4583..4f059cb 100644 --- a/compiler/luci/partition/src/Nodes/CircleGather.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGather.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp index 7f4e084..f427e04 100644 --- a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp index 4be05ca..6a9c3b4 100644 --- a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp index d673698..0207e91 100644 --- a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.cpp index 7bc2a14..9f4b18f 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreater.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreater.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp index 842370d..61d1f59 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp index 536a0ae..76130a8 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp index 76dc770..7e4e1ef 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleIf.cpp b/compiler/luci/partition/src/Nodes/CircleIf.cpp index 1672a13..45e4ec4 100644 --- a/compiler/luci/partition/src/Nodes/CircleIf.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIf.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp index dbd25c8..cbb7662 100644 --- a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp index 969bdd9..2eb5dda 100644 --- a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp index 9207654..ec2dde3 100644 --- a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp index 386652f..f64ffd8 100644 --- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp +++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp index b932223..4363c6c 100644 --- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp index 61ddba2..df26930 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp index 4fc2372..b114a15 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp index 24333d5..1eacddb 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp index 4032848..22f99d5 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp index 3da1ba2..1702dde 100644 --- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp index 5a0d1dd..71dc55e 100644 --- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLess.cpp b/compiler/luci/partition/src/Nodes/CircleLess.cpp index aab495f..52726f9 100644 --- a/compiler/luci/partition/src/Nodes/CircleLess.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLess.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp index ab65e5d..c5d194e 100644 --- a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp index ec129db..e9a3c41 100644 --- a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp index 0dd8986..29f4aba 100644 --- a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp index 6b0d1cd..7a00bf9 100644 --- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp index e197338..5e57238 100644 --- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLog.cpp b/compiler/luci/partition/src/Nodes/CircleLog.cpp index c43570f..676d22f 100644 --- a/compiler/luci/partition/src/Nodes/CircleLog.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLog.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp index 8a43f6f..0a2b975 100644 --- a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp index de582c8..c67b08f 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp index 1e60bf5..b6daeb7 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp index 28e8f42..1498d85 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp index a1189f0..0b95136 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp index e265782..f9c077e 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp index f6b3459..88dff36 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp index 418dc02..59592e4 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp index fee3f47..35f8029 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp index 7d78851..804597b 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp index c4b3f7f..241d840 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp index e92806a..297e9f2 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp index 03e3c3c..472cab8 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp index 29bb7fe..b327aac 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp index 5503ea1..4ff797c 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp index 75a665a..dee90e5 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp index 1699649..949e0d7 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp index 2ba6055..459917e 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp index 370174c..e6a6d57 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp index b634e58..c704d00 100644 --- a/compiler/luci/partition/src/Nodes/CircleMean.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp index 53435d9..838d7ae 100644 --- a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp index cdf7575..8958bf6 100644 --- a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp index 2fe6b0d..a6c86a2 100644 --- a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp index 16a24ab..91c3cb9 100644 --- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp index 605a126..b837e10 100644 --- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp index 2cd2b40..12e1472 100644 --- a/compiler/luci/partition/src/Nodes/CircleMul.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp index 99cf082..b316679 100644 --- a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.cpp index 413ad49..e9dcc45 100644 --- a/compiler/luci/partition/src/Nodes/CircleNeg.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNeg.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp index bd74a36..ab13c94 100644 --- a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp index 63ff3f0..88d72e1 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp index 2771aef..e796a14 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp index 80e4704..61caa3a 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp index 5a0a8da..eb04f26 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp index c1f1177..3b0b755 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp index 1f20fbb..c9c31b3 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp index 69e3cc8..3eed260 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp index e001b0b..2c5822f 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp index c40c2a2..29a6a43 100644 --- a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp index 360940c..2983e1b 100644 --- a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp index d76f492..d172fb8 100644 --- a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp index 3c555c2..59780e4 100644 --- a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp index a033e80..61d7620 100644 --- a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp index 106eb40..36ce350 100644 --- a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp index b8a2341..6a23257 100644 --- a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp index e5bcedc..f2a2e2c 100644 --- a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePack.cpp b/compiler/luci/partition/src/Nodes/CirclePack.cpp index 3268810..d4b49bf 100644 --- a/compiler/luci/partition/src/Nodes/CirclePack.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePack.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp index 68c5138..665b137 100644 --- a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePad.cpp b/compiler/luci/partition/src/Nodes/CirclePad.cpp index eb2a89c..0a1d6f7 100644 --- a/compiler/luci/partition/src/Nodes/CirclePad.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePad.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp index 24ea83f..72f97d6 100644 --- a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp index 001fecb..969cc27 100644 --- a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp index aea8e0c..9829f62 100644 --- a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp index fb180ee..ce69e74 100644 --- a/compiler/luci/partition/src/Nodes/CirclePow.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp index 7a5be4d..f4e49c0 100644 --- a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp index 340c1da..903a94e 100644 --- a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp index 1f348b4..5ca1a6b 100644 --- a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRange.cpp b/compiler/luci/partition/src/Nodes/CircleRange.cpp index f295338..fa1a02c 100644 --- a/compiler/luci/partition/src/Nodes/CircleRange.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRange.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp index 59a95f1..b5b0c8a 100644 --- a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRank.cpp b/compiler/luci/partition/src/Nodes/CircleRank.cpp index f7cce76..35b4764 100644 --- a/compiler/luci/partition/src/Nodes/CircleRank.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRank.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp index 74c520b..5a0a71a 100644 --- a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp index ed762db..262e12a 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp index 792f511..45c2920 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp index 09586ec..d91c78e 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp index 8fbaf65..2ad18f3 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp index 105214d..65fca6a 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp index c37d624..db48f54 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp index 2fb4e3e..daac168 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp index cc1ac83..f5f69f0 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.cpp index d3617bd..63ac31b 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp index ccaf576..ec4d10f 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp index fb9ba6f..c2956c4 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp index 1341b0e..e9ecbe2 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp index 476195b..1141297 100644 --- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp index 7dc63c6..ae60a97 100644 --- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.cpp index e596704..49f7c64 100644 --- a/compiler/luci/partition/src/Nodes/CircleReshape.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReshape.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp index 73cbbdf..198cfa1 100644 --- a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp index 0f50401..41fdedf 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp index c2d8b71..437e448 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp index c985b7f..567db49 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp index 9cc2e55..5dc99a3 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp index 225d29e..348cdbb 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp index 408fc0c..7519103 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp index d59a7de..4b8c4a4 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp index d41ad8e..351c6f2 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRound.cpp b/compiler/luci/partition/src/Nodes/CircleRound.cpp index 9170bcd..97d0028 100644 --- a/compiler/luci/partition/src/Nodes/CircleRound.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRound.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp index fad0904..02f335d 100644 --- a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp index 03e64aa..44abd5e 100644 --- a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp index d76b96e..39ae1f8 100644 --- a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp index f661a79..e2b99c4 100644 --- a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp index 5fae520..af8cd55 100644 --- a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp index 62912b7..88a3ecf 100644 --- a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp index f271f88..4ce7875 100644 --- a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp index 5fc320a..6540416 100644 --- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp index a6bcff2..453b7cc 100644 --- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.cpp index dbe1dd4..436e956 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelect.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelect.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp index 912934b..2a38de5 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp index 28072c8..a8b6ab5 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp index e8d128e..c2ebdbe 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleShape.cpp b/compiler/luci/partition/src/Nodes/CircleShape.cpp index f93cf14..2fb3dcd 100644 --- a/compiler/luci/partition/src/Nodes/CircleShape.cpp +++ b/compiler/luci/partition/src/Nodes/CircleShape.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp index 9b4afdc..38033a3 100644 --- a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSin.cpp b/compiler/luci/partition/src/Nodes/CircleSin.cpp index 62c776e..0ef6059 100644 --- a/compiler/luci/partition/src/Nodes/CircleSin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp index fbee6f6..e141b45 100644 --- a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.cpp index 7895d9e..811d81f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSlice.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSlice.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp index 3c666ad..0718c7f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp index 0a93787..6b08f00 100644 --- a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp index b256298..571ad80 100644 --- a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp index b94948b..dc48b36 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp index 279e9b2..0fcf22f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp index bd4523c..55d562f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp index 207163d..771c1f3 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp index d1ed188..cc2f5e9 100644 --- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp index 2257186..06b3814 100644 --- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.cpp index d6d62a8..5f851f0 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplit.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplit.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp index d8d0953..a4242b9 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp index 4021f20..1a44758 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp index 85fe268..b7cf6fc 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp index f132057..43ebe07 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp index 3ac1d6c..877a447 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp index 2034805..4bac6c5 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp index 434dfb0..b3cf4d9 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp index f737aac..fd6d0ec 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp index fa7f7fe..be29883 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.cpp index 1476a86..56dd544 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquare.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquare.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp index bb6a7c3..a509b31 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp index 40dd317..e47be2c 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp index 9cfe9ee..a900f1d 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp index bc9fda2..ffe3c91 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp index 1f09710..7a6e2bf 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp index 3bdca8a..953b451 100644 --- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp +++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp index 130ff91..3e950fd 100644 --- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp index 8ac294b..c5bea08 100644 --- a/compiler/luci/partition/src/Nodes/CircleSub.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp index 7c0d837..ca51865 100644 --- a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSum.cpp b/compiler/luci/partition/src/Nodes/CircleSum.cpp index bef1d46..e929fd0 100644 --- a/compiler/luci/partition/src/Nodes/CircleSum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp index 1ed65c0..21f6bbb 100644 --- a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.cpp index e6c56eb..ef5c2c9 100644 --- a/compiler/luci/partition/src/Nodes/CircleTanh.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTanh.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp index 17cd487..1e2d062 100644 --- a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTile.cpp b/compiler/luci/partition/src/Nodes/CircleTile.cpp index 0381b4d..0c21743 100644 --- a/compiler/luci/partition/src/Nodes/CircleTile.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTile.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp index 79d1ba1..9449c1f 100644 --- a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp index ce8a6f5..41dfa9c 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp index f08f3f3..e0c4a3a 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp index 6ca6e3d..19f0fa7 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp index a5c1c43..ba085f6 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp index 1cbb546..cbbdb00 100644 --- a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp index b3b1630..8476838 100644 --- a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp index 469cc9a..6b6819d 100644 --- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp index ee9fb0e..68adaad 100644 --- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp index 3f0374a..3323014 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp index aeefef0..2630461 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.cpp index 79ca594..c035b7e 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnique.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnique.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp index 23f2998..910087a 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp index f244dd6..23b1aba 100644 --- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp index 8876407..9549574 100644 --- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp index f83c5d8..43ebcb4 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp index b164cc3..444b043 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp index b8982ff..ee1de15 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp index 9ed4409..2aaef8d 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp index f7f6f21..e7a794a 100644 --- a/compiler/luci/partition/src/Nodes/CircleVariable.cpp +++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.cpp index 8ef2742..d0fc846 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhere.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhere.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp index 942f804..f17131c 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.cpp index 7820aca..95b77f7 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhile.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhile.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp index bffb786..6ee7aba 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp index 1cb4419..5cd6835 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp index 901f31b..f58eba0 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp index 715042d..795d88d 100644 --- a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp +++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp index 74c873c..f887bc3 100644 --- a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp index 60dc74f..969fa70 100644 --- a/compiler/luci/partition/src/PartitionIR.cpp +++ b/compiler/luci/partition/src/PartitionIR.cpp @@ -64,7 +64,7 @@ std::unique_ptr PGroups::make_copy(void) const // note: d_pgroup is now nullptr as it's moved } - return std::move(d_pgroups); + return d_pgroups; } GroupKey PGroups::group_of(luci::CircleNode *node) const diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp index 4c3971b..aa8a827 100644 --- a/compiler/luci/partition/src/PartitionMerge.cpp +++ b/compiler/luci/partition/src/PartitionMerge.cpp @@ -255,7 +255,7 @@ std::unique_ptr merge_pgroups(const luci::PGroups *s_pgroups) } } while (changed); - return std::move(d_pgroups); + return d_pgroups; } } // namespace luci diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp index eaeacf9..2e95f08 100644 --- a/compiler/luci/partition/src/PartitionPGroups.cpp +++ b/compiler/luci/partition/src/PartitionPGroups.cpp @@ -257,7 +257,7 @@ std::unique_ptr produce_pgroups(const luci::Module *source, } } - return std::move(pgroups); + return pgroups; } } // namespace luci diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp index beaaf60..251dbea 100644 --- a/compiler/luci/partition/src/PartitionPModules.cpp +++ b/compiler/luci/partition/src/PartitionPModules.cpp @@ -15,7 +15,7 @@ */ #include "PartitionPModules.h" -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "luci/Service/CircleNodeClone.h" #include "luci/Log.h" @@ -156,7 +156,7 @@ std::unique_ptr clone_graph(loco::Graph *graph_org, luci::CloneCont add_graph_output(graph_clone, output_clone); } - return std::move(graph); + return graph; } void clone_recursive_subgraphs(luci::PartedModule &pm, loco::Graph *graph, diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt index 5237c6d..d9d004d 100644 --- a/compiler/luci/pass/CMakeLists.txt +++ b/compiler/luci/pass/CMakeLists.txt @@ -1,9 +1,16 @@ nnas_find_package(FlatBuffers EXACT 2.0 QUIET) +nnas_find_package(Fp16Source QUIET) + if(NOT FlatBuffers_FOUND) message(STATUS "FlatBuffers NOT FOUND") return() endif(NOT FlatBuffers_FOUND) +if(NOT Fp16Source_FOUND) + message(STATUS "Fp16Source NOT FOUND") + return() +endif(NOT Fp16Source_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) @@ -14,6 +21,7 @@ endif(NOT LUCI_LIBRARY_TYPE) add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_pass PRIVATE src) +target_include_directories(luci_pass PRIVATE ${Fp16Source_DIR}/include) target_include_directories(luci_pass PUBLIC include) target_link_libraries(luci_pass PUBLIC loco) target_link_libraries(luci_pass PUBLIC logo_core) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index c803898..b94822c 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -47,8 +47,10 @@ public: ResolveCustomOpBatchMatMul, ResolveCustomOpMatMul, ResolveCustomOpMaxPoolWithArgmax, + ResolveCustomOpSplitV, FoldAddV2, FoldCast, + FoldDensify, FoldDepthwiseConv2D, FoldDequantize, FoldGather, @@ -61,6 +63,7 @@ public: ShuffleWeightTo16x1Float32, RemoveRedundantTranspose, ReplaceMulAddWithDepthwiseConv, + ReplaceNonConstFCWithBatchMatMul, ReplaceSubWithAdd, SubstitutePackToReshape, SubstitutePadV2ToPad, diff --git a/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h new file mode 100644 index 0000000..8ec81b1 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FOLD_DENSIFY_PASS_H__ +#define __LUCI_FOLD_DENSIFY_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to Fold Densify if input is Sparse Constant + * + */ +struct FoldDensifyPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FoldDensifyPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FOLD_DENSIFY_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h new file mode 100644 index 0000000..2deb752 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ +#define __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to remove redundant dequantize operations + */ +struct RemoveRedundantDequantizePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveRedundantDequantizePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h new file mode 100644 index 0000000..19948a3 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ +#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to remove unnecessary Reshape nodes. + * @details This class will remove unnecessary pre/post-Reshape nodes. + * See https://github.com/Samsung/ONE/issues/9600 for more details. + */ +struct RemoveUnnecessaryReshapeNetPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveUnnecessaryReshapeNetPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h new file mode 100644 index 0000000..24e16ec --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ +#define __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to replace "FC with non-const weight" with Batched MatMul + */ +struct ReplaceNonConstFCWithBatchMatMulPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ReplaceNonConstFCWithBatchMatMulPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h new file mode 100644 index 0000000..d4f0147 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ +#define __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to resolve certain custom op of subgraph into splitv op in circle schema. + */ +struct ResolveCustomOpSplitVPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ResolveCustomOpSplitVPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 6dbb22d..74c569d 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -20,6 +20,7 @@ #include "luci/Pass/ExpandBroadcastConstPass.h" #include "luci/Pass/FoldAddV2Pass.h" #include "luci/Pass/FoldCastPass.h" +#include "luci/Pass/FoldDensifyPass.h" #include "luci/Pass/FoldDepthwiseConv2DPass.h" #include "luci/Pass/FoldDequantizePass.h" #include "luci/Pass/FoldGatherPass.h" @@ -43,15 +44,18 @@ #include "luci/Pass/RemoveRedundantTransposePass.h" #include "luci/Pass/RemoveRedundantQuantizePass.h" #include "luci/Pass/RemoveUnnecessaryReshapePass.h" +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" #include "luci/Pass/RemoveUnnecessarySlicePass.h" #include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h" #include "luci/Pass/RemoveUnnecessarySplitPass.h" +#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h" #include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" #include "luci/Pass/ReplaceSubWithAddPass.h" #include "luci/Pass/ResolveCustomOpAddPass.h" #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h" +#include "luci/Pass/ResolveCustomOpSplitVPass.h" #include "luci/Pass/SparsifyTensorPass.h" #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" #include "luci/Pass/SubstitutePackToReshapePass.h" @@ -127,7 +131,8 @@ bool OptimizeOptionsImpl::query(Algorithm algo) return true; } -void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output) +// TODO Make a struct for args +void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output, bool fuse_fc) { logo::Phase phase; @@ -135,6 +140,21 @@ void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_out phase.emplace_back(std::make_unique()); phase.emplace_back(std::make_unique()); + // Resolve custom Ops + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + + // Fuse FullyConnected with Add + // Why we perform FuseAddWithFullyConnectedPass before ConvertNCHWToNHWCPass? + // FullyConnected Op's layout is not changed in ConvertNCHWToNHWCPass, while + // Add Op's layer is changed from NCHW to NHWC. + // This disables fusion of Add and FullyConnected after ConvertNCHWToNHWC. + if (fuse_fc) + phase.emplace_back(std::make_unique()); + phase.emplace_back( std::make_unique(preserve_input, preserve_output)); @@ -190,7 +210,9 @@ void CircleOptimizer::optimize(loco::Graph *g) const bool preserve_output = _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_output_shape) != "true"; - convert_nchw_to_nhwc(g, preserve_input, preserve_output); + bool fuse_fc = _options->query(Options::Algorithm::FuseAddWithFullyConnected); + + convert_nchw_to_nhwc(g, preserve_input, preserve_output, fuse_fc); } /* TRANSFORM DECLARATION BEGIN */ @@ -220,6 +242,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::ResolveCustomOpSplitV)) + { + phase.emplace_back(std::make_unique()); + } if (_options->query(Options::Algorithm::FuseInstanceNorm)) { phase.emplace_back(std::make_unique()); @@ -260,6 +286,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::FoldDensify)) + { + phase.emplace_back(std::make_unique()); + } if (_options->query(Options::Algorithm::FoldDepthwiseConv2D)) { phase.emplace_back(std::make_unique()); @@ -307,6 +337,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape)) { phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); } if (_options->query(Options::Algorithm::RemoveUnnecessarySlice)) { @@ -332,6 +363,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::ReplaceNonConstFCWithBatchMatMul)) + { + phase.emplace_back(std::make_unique()); + } if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv)) { phase.emplace_back(std::make_unique()); diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp index ce38a90..9a6550b 100644 --- a/compiler/luci/pass/src/CircleQuantizer.cpp +++ b/compiler/luci/pass/src/CircleQuantizer.cpp @@ -22,6 +22,7 @@ #include "luci/Pass/RequantizePass.h" #include "luci/Pass/ConvertToFakeQuantizedModelPass.h" #include "luci/Pass/FoldDequantizePass.h" +#include "luci/Pass/RemoveRedundantDequantizePass.h" #include "luci/Pass/QuantizePreCheckerPass.h" #include "luci/Pass/QuantizeWithMinMaxPass.h" #include "luci/Pass/QuantizeDequantizeWeightsPass.h" @@ -252,8 +253,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const static const std::vector qwmm_supported_input_model_dtype{"float32"}; static const std::vector qwmm_supported_output_model_dtype{"uint8", "int16"}; static const std::vector qwmm_supported_granularity{"layer", "channel"}; - static const std::vector qwmm_supported_input_type{"uint8", "int16"}; - static const std::vector qwmm_supported_output_type{"uint8", "int16"}; + static const std::vector qwmm_supported_input_type{"uint8", "int16", "float32"}; + static const std::vector qwmm_supported_output_type{"uint8", "int16", "float32"}; auto input_model_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); @@ -434,6 +435,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const phase.emplace_back(std::make_unique()); phase.emplace_back(std::make_unique()); + // Remove redundant Dequantize Ops generated during fake quantization + phase.emplace_back(std::make_unique()); // Fold Dequantize Ops generated during fake quantization phase.emplace_back(std::make_unique()); diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp index ce4f540..55a29d1 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp @@ -28,6 +28,69 @@ namespace { +// Return true if from can be broadcasted to to +// to's shape is [N, C, H, W] +bool broadcastable(const luci::CircleConst *from, const luci::CircleNode *to) +{ + assert(to->rank() == 4); // FIX_CALLER_UNLESS + + const auto from_rank = from->rank(); + if (from_rank > 4) + return false; + + // Scalar is always broadcastable + if (from_rank == 0) + return true; + + for (uint32_t i = 1; i <= from_rank; i++) + { + auto to_index = 4 - i; + auto from_index = from_rank - i; + + if (from->dim(from_index).value() != to->dim(to_index).value() and + from->dim(from_index).value() != 1) + return false; + } + + return true; +} + +// Expand node to rank 4 +// node should have rank less than or equal to 4 +void expand_to_rank_4(luci::CircleConst *node) +{ + auto original_rank = node->rank(); + + assert(original_rank <= 4); // FIX_CALLER_UNLESS + + if (original_rank == 4) + return; + + std::vector original_shape; + for (uint32_t i = 0; i < original_rank; i++) + { + original_shape.emplace_back(node->dim(i).value()); + } + + node->rank(4); + for (uint32_t i = 0; i < (4 - original_rank); i++) + node->dim(i) = 1; + + for (uint32_t i = 0; i < original_rank; i++) + node->dim(i + (4 - original_rank)) = original_shape.at(i); +} + +bool is_output(const loco::Node *node) +{ + auto cnode = loco::must_cast(node); + auto opcode = cnode->opcode(); + if (opcode == luci::CircleOpcode::CIRCLEOUTPUT || + opcode == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE) + return true; + + return false; +} + bool is_same_shape(const luci::CircleNode *node, const std::vector &shape) { if (not node) @@ -484,7 +547,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node, // // Find MUL with an NCHW pattern described below // - Input (non-constant) shape : [N, C, H, W] -// - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1) +// - Input (constant) shape : broadcastable to [N, C, H, W] // - Output shape : [N, C, H, W] bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node, luci::CircleConst *&multiplier) @@ -511,32 +574,12 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod if (pred_node->rank() != 4) return false; - const auto const_rank = multiplier->rank(); - // Support Rank 4 or scalar (rank 0 or 1) - if (const_rank != 4 && const_rank != 0 && const_rank != 1) + if (not broadcastable(multiplier, node)) return false; - const auto input_cdim = pred_node->dim(1); - const auto output_cdim = node->dim(1); - - if (const_rank == 4) - { - bool supported_shape = false; - - // Check multiplier is (1, C, 1, 1) - if (is_same_shape(multiplier, {1, node->dim(1), 1, 1})) - supported_shape = true; - - // Check multiplier is (N, C, H, W) - if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) - supported_shape = true; + expand_to_rank_4(multiplier); - return supported_shape; - } - if (input_cdim == output_cdim) - return true; - else - return false; + return true; } // We assume ADD with const input is NCHW if, @@ -569,32 +612,12 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod if (pred_node->rank() != 4) return false; - const auto const_rank = beta->rank(); - // Support Rank 4 or scalar (rank 0 or 1) - if (const_rank != 4 && const_rank != 0 && const_rank != 1) + if (not broadcastable(beta, node)) return false; - const auto input_cdim = pred_node->dim(1); - const auto output_cdim = node->dim(1); - - if (const_rank == 4) - { - bool supported_shape = false; - - // Check beta is (1, C, 1, 1) - if (is_same_shape(beta, {1, node->dim(1), 1, 1})) - supported_shape = true; - - // Check beta is (N, C, H, W) - if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) - supported_shape = true; + expand_to_rank_4(beta); - return supported_shape; - } - if (input_cdim == output_cdim) - return true; - else - return false; + return true; } // We assume SUB with const input is NCHW if, @@ -675,6 +698,24 @@ template bool convert_unary_x(T *node) return true; } +template bool convert_unary_logits(T *node) +{ + const auto pred_node = loco::must_cast(node->logits()); + auto pre_trans = create_pre_transpose(node); + pre_trans->a(pred_node); + node->logits(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + auto post_trans = create_post_transpose(node); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; +} + class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor { // Default @@ -742,17 +783,14 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor if (is_NCHW_with_const(node, pred_node, beta)) { + assert(beta->rank() == 4); // FIX is_NCHW_with_const unless + auto nhwc_const = create_NHWC_from_NCHW(beta); + if (nhwc_const == nullptr) + return false; + node->y(nhwc_const); + auto pre_trans = create_pre_transpose(node); pre_trans->a(pred_node); - - if (beta->rank() == 4) - { - auto nhwc_const = create_NHWC_from_NCHW(beta); - if (nhwc_const == nullptr) - return false; - node->y(nhwc_const); - } - node->x(pre_trans); } else if (beta == nullptr) @@ -816,6 +854,11 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor bool visit(luci::CircleLogistic *node) { return convert_unary_x(node); } + bool visit(luci::CircleLogSoftmax *node) + { + return convert_unary_logits(node); + } + bool visit(luci::CircleMaximum *node) { luci::CircleNode *pred_node = nullptr; @@ -954,15 +997,15 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor if (is_NCHW_with_const(node, pred_node, multiplier)) { + assert(multiplier->rank() == 4); // FIX is_NCHW_with_const unless + auto nhwc_const = create_NHWC_from_NCHW(multiplier); + if (nhwc_const == nullptr) + return false; + node->y(nhwc_const); + auto pre_trans = create_pre_transpose(node); pre_trans->a(pred_node); node->x(pre_trans); - - if (multiplier->rank() == 4) - { - auto nhwc_const = create_NHWC_from_NCHW(multiplier); - node->y(nhwc_const); - } } else if (multiplier == nullptr) { @@ -1049,12 +1092,127 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor return true; } + // TODO Reduce duplicate code with CircleMean + bool visit(luci::CircleReduceMax *node) + { + auto input = loco::must_cast(node->input()); + if (input->rank() != 4) + return false; + + auto rindices = dynamic_cast(node->reduction_indices()); + if (not rindices) + return false; + + auto nhwc_rindices = create_NHWC_rindices(rindices); + if (not nhwc_rindices) + return false; + + auto pre_trans = create_pre_transpose(node); + pre_trans->a(input); + node->input(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + node->reduction_indices(nhwc_rindices); + + if (node->keep_dims()) + { + auto post_trans = create_post_transpose(node); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; + } + + // The below codes handle the cases where node->keep_dims() == false + // 1D output never needs a transpose + if (node->rank() <= 1) + return true; + + std::vector reduced_dims_nhwc(4, false); + uint32_t num_reduced_indices = nhwc_rindices->size(); + + for (uint32_t ri = 0; ri < num_reduced_indices; ++ri) + { + reduced_dims_nhwc[nhwc_rindices->at(ri)] = true; + } + + // if channel dimension has been reduced, we don't need a transpose + if (reduced_dims_nhwc[3]) + return true; + + // likewise, if both space dimensions are reduced, no transpose is needed + if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2]) + return true; + + std::vector post_trans_ind; + // case 1: only N is reduced + if (num_reduced_indices == 1 && reduced_dims_nhwc[0]) + post_trans_ind = {2, 0, 1}; + + // case 2: only H or W is reduced + if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2])) + post_trans_ind = {0, 2, 1}; + + // case 3: N and either H or W are reduced + if (num_reduced_indices == 2) + post_trans_ind = {1, 0}; + + auto post_trans = create_Nd_transpose(node, post_trans_ind); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; + } + bool visit(luci::CircleRelu *node) { return convert_unary_features(node); } bool visit(luci::CircleRelu6 *node) { return convert_unary_features(node); } bool visit(luci::CircleRsqrt *node) { return convert_unary_x(node); } + bool visit(luci::CircleSoftmax *node) { return convert_unary_logits(node); } + + bool visit(luci::CircleSplitV *node) + { + // Change split dimension + auto axis = dynamic_cast(node->split_dim()); + if (not axis) + return false; + + if (axis->dtype() != loco::DataType::S32) + return false; + + if (axis->size() != 1) + return false; + + axis->at(0) = nchw_axis_to_nhwc(axis->at(0)); + + // Insert pre-transpose + const auto pred_node = loco::must_cast(node->input()); + auto pre_trans = create_pre_transpose(node); + pre_trans->a(pred_node); + node->input(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + // Insert post-transposes + for (auto succ : loco::succs(node)) + { + auto svo = loco::must_cast(succ); + + auto post_trans = create_post_transpose(svo); + loco::replace(svo).with(post_trans); + post_trans->a(svo); + } + + return true; + } + bool visit(luci::CircleSquaredDifference *node) { // TODO support CircleConst input @@ -1195,6 +1353,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) // pre-Transpose --- [intermediate Ops] --- post-Transpose // | // +--[intermediate Ops] --- post-Transpose + // + // NOTE Intermediate Ops SHOULD NOT contain pre-Transpose/Reshape for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { if (has_data_format(node)) @@ -1202,25 +1362,51 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) if (is_pre_transpose(node) || is_pre_reshape(node)) { + std::set intermediate; + + // Variable to check intermediate Ops contain pre-Transpose/Reshape + bool has_pre = false; + + // Variable to check the pattern is closed with post-Transpose/Reshape + bool is_closed = true; + // For recursive call of lambda - std::function set_data_format_to_succs; - set_data_format_to_succs = [&](loco::Node *n) { + std::function collect_intermediate; + collect_intermediate = [&](loco::Node *n) { for (auto succ : loco::succs(n)) { // Exit condition if (is_post_transpose(succ) || is_post_reshape(succ)) continue; - if (not has_data_format(succ)) + if (is_pre_transpose(succ) || is_pre_reshape(succ)) + { + has_pre = true; + break; + } + + if (is_output(succ)) { - set_data_format(succ, DataFormat::NHWC); + is_closed = false; + break; } - set_data_format_to_succs(succ); + intermediate.emplace(succ); + + collect_intermediate(succ); } }; - set_data_format_to_succs(node); + collect_intermediate(node); + + if (has_pre or not is_closed) + continue; + + for (auto inter : intermediate) + { + if (not has_data_format(inter)) + set_data_format(inter, DataFormat::NHWC); + } } } @@ -1248,6 +1434,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) case luci::CircleOpcode::ELU: case luci::CircleOpcode::LEAKY_RELU: case luci::CircleOpcode::LOGISTIC: + case luci::CircleOpcode::LOG_SOFTMAX: case luci::CircleOpcode::MAXIMUM: case luci::CircleOpcode::MEAN: case luci::CircleOpcode::MINIMUM: @@ -1255,9 +1442,12 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) case luci::CircleOpcode::NEG: case luci::CircleOpcode::PAD: case luci::CircleOpcode::PADV2: + case luci::CircleOpcode::REDUCE_MAX: case luci::CircleOpcode::RELU: case luci::CircleOpcode::RELU6: case luci::CircleOpcode::RSQRT: + case luci::CircleOpcode::SOFTMAX: + case luci::CircleOpcode::SPLIT_V: case luci::CircleOpcode::SQUARED_DIFFERENCE: case luci::CircleOpcode::SUB: if (!has_data_format(node)) @@ -1296,7 +1486,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) if (circle_node->rank() != 4) { // TODO replace the check above with the input rank check, and remove the condition below - if (not dynamic_cast(node)) + if (not dynamic_cast(node) and + not dynamic_cast(node)) continue; } diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp index dd81d13..6bb3d32 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp @@ -16,6 +16,8 @@ #include +#include + #include "luci/Pass/ConvertNCHWToNHWCPass.h" #include "luci/Pass/CircleShapeInferencePass.h" @@ -23,6 +25,8 @@ #include +using namespace luci::test; + namespace { @@ -202,6 +206,173 @@ public: luci::CircleConst *post_shape = nullptr; }; +/** + * Graph with pre-Reshape but no post-Transpose/Reshape. + * + * BEFORE + * [Input] + * | + * [Pre-Reshape] + * | + * [Relu] + * | + * [Output] + * + * AFTER + * [Input] + * | + * [Pre-Reshape] + * | + * [Pre-Transpose] + * | + * [Relu] + * | + * [Post-Transpose] + * | + * [Output] + */ +class NoPostReshapeGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + relu = g.nodes()->create(); + pre_reshape = g.nodes()->create(); + pre_shape = g.nodes()->create(); + + pre_shape->dtype(loco::DataType::S32); + + uint32_t channel_size = 16; + auto in = loco::must_cast(input); + in->shape({1, channel_size, 4, 4}); + pre_shape->shape({4}); + + pre_shape->size(4); + pre_shape->at(0) = 1; + pre_shape->at(1) = 4; + pre_shape->at(2) = 4; + pre_shape->at(3) = channel_size; + + pre_reshape->tensor(input); + pre_reshape->shape(pre_shape); + relu->features(pre_reshape); + + relu->name("Relu"); + pre_reshape->name("pre-reshape"); + + return relu; + } + +public: + luci::CircleRelu *relu = nullptr; + luci::CircleReshape *pre_reshape = nullptr; + luci::CircleConst *pre_shape = nullptr; +}; + +/** + * Graph with two pre-Reshapes + * + * BEFORE + * [Input] + * | + * [Pre-Reshape] + * | + * [Relu] + * | + * [Pre-Reshape] + * | + * [Post-Reshape] + * | + * [Output] + * + * AFTER + * [Input] + * | + * [Pre-Reshape] + * | + * [Pre-Transpose] + * | + * [Relu] + * | + * [Post-Transpose] + * | + * [Pre-Reshape] + * | + * [Post-Reshape] + * | + * [Output] + */ +class ReluNotClosedGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + relu = g.nodes()->create(); + pre_reshape = g.nodes()->create(); + pre_reshape_2 = g.nodes()->create(); + post_reshape = g.nodes()->create(); + pre_shape = g.nodes()->create(); + pre_shape_2 = g.nodes()->create(); + post_shape = g.nodes()->create(); + + pre_shape->dtype(loco::DataType::S32); + pre_shape_2->dtype(loco::DataType::S32); + post_shape->dtype(loco::DataType::S32); + + uint32_t channel_size = 16; + auto in = loco::must_cast(input); + in->shape({1, channel_size, 4, 4}); + pre_shape->shape({4}); + pre_shape_2->shape({4}); + post_shape->shape({4}); + + pre_shape->size(4); + pre_shape->at(0) = 1; + pre_shape->at(1) = 4; + pre_shape->at(2) = 4; + pre_shape->at(3) = channel_size; + + pre_shape_2->size(4); + pre_shape_2->at(0) = 1; + pre_shape_2->at(1) = 4; + pre_shape_2->at(2) = channel_size; + pre_shape_2->at(3) = 4; + + post_shape->size(4); + post_shape->at(0) = 1; + post_shape->at(1) = 4; + post_shape->at(2) = 4; + post_shape->at(3) = channel_size; + + pre_reshape->tensor(input); + pre_reshape->shape(pre_shape); + + relu->features(pre_reshape); + + pre_reshape_2->tensor(relu); + pre_reshape_2->shape(pre_shape_2); + + post_reshape->tensor(pre_reshape_2); + post_reshape->shape(post_shape); + + relu->name("Relu"); + pre_reshape->name("pre-reshape"); + pre_reshape->name("pre-reshape-2"); + post_reshape->name("post-reshape"); + + return post_reshape; + } + +public: + luci::CircleRelu *relu = nullptr; + luci::CircleReshape *pre_reshape = nullptr; + luci::CircleReshape *pre_reshape_2 = nullptr; + luci::CircleReshape *post_reshape = nullptr; + luci::CircleConst *pre_shape = nullptr; + luci::CircleConst *pre_shape_2 = nullptr; + luci::CircleConst *post_shape = nullptr; +}; + class AddScalarGraph final : public SimpleGraph { protected: @@ -312,6 +483,22 @@ public: luci::CircleLogistic *logistic = nullptr; }; +class LogSoftmaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + log_softmax = g.nodes()->create(); + log_softmax->logits(input); + log_softmax->name("log_softmax"); + + return log_softmax; + } + +public: + luci::CircleLogSoftmax *log_softmax = nullptr; +}; + class MaximumGraph final : public SimpleGraph { protected: @@ -642,6 +829,51 @@ public: luci::CircleConst *const_value = nullptr; }; +class ReduceMaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + rm = g.nodes()->create(); + rindices = g.nodes()->create(); + + rm->dtype(loco::DataType::FLOAT32); + rindices->dtype(loco::DataType::S32); + + rm->shape(_shape); + rindices->shape({static_cast(_axes.size())}); + + rindices->size(_axes.size()); + for (uint32_t i = 0; i < _axes.size(); ++i) + { + rindices->at(i) = _axes[i]; + } + + rm->input(input); + rm->reduction_indices(rindices); + rm->keep_dims(_keep_dims); + + rm->name("reduce_max"); + rindices->name("rindices"); + + return rm; + } + +public: + void keep_dims(bool val) { _keep_dims = val; } + void axes(std::vector val) { _axes = val; } + void shape(std::initializer_list val) { _shape = val; } + +public: + luci::CircleReduceMax *rm = nullptr; + luci::CircleConst *rindices = nullptr; + +private: + bool _keep_dims = true; + std::vector _axes = {2, 3}; + std::initializer_list _shape = {1, 16, 1, 1}; +}; + class ReluGraph final : public SimpleGraph { protected: @@ -690,6 +922,111 @@ public: luci::CircleRsqrt *rsqrt = nullptr; }; +class SoftmaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + softmax = g.nodes()->create(); + softmax->logits(input); + softmax->name("softmax"); + + return softmax; + } + +public: + luci::CircleSoftmax *softmax = nullptr; +}; + +class SplitVGraphlet +{ +public: + SplitVGraphlet() = default; + +public: + void init(loco::Graph *g) + { + // CircleCustom(SplitV) + _splitv = g->nodes()->create(); + _splitv->shape({1, 2, 2, 192}); + _splitv->dtype(loco::DataType::FLOAT32); + _splitv->name("splitv"); + + // CircleConst + auto size_splits = g->nodes()->create(); + size_splits->dtype(loco::DataType::S32); + size_splits->shape({3}); + size_splits->size(3); + size_splits->at(0) = 32; + size_splits->at(1) = 32; + size_splits->at(2) = 128; + + // CircleConst + auto split_dim = g->nodes()->create(); + split_dim->dtype(loco::DataType::S32); + split_dim->rank(0); + split_dim->size(1); + split_dim->scalar() = 3; + + _splitv->size_splits(size_splits); + _splitv->split_dim(split_dim); + _splitv->num_split(3); + + // CircleSplitVOut + _splitv_out1 = g->nodes()->create(); + _splitv_out1->shape({1, 2, 2, 32}); + _splitv_out1->dtype(loco::DataType::FLOAT32); + _splitv_out1->index(0); + _splitv_out1->input(_splitv); + _splitv_out1->name("splitv_out1"); + + // CircleSplitVOut + _splitv_out2 = g->nodes()->create(); + _splitv_out2->shape({1, 2, 2, 32}); + _splitv_out2->dtype(loco::DataType::FLOAT32); + _splitv_out2->index(1); + _splitv_out2->input(_splitv); + _splitv_out2->name("splitv_out2"); + + // CircleSplitVOut + _splitv_out3 = g->nodes()->create(); + _splitv_out3->shape({1, 2, 2, 128}); + _splitv_out3->dtype(loco::DataType::FLOAT32); + _splitv_out3->index(2); + _splitv_out3->input(_splitv); + _splitv_out3->name("splitv_out3"); + } + +public: + luci::CircleSplitV *splitv() { return _splitv; } + +protected: + luci::CircleSplitV *_splitv = nullptr; + luci::CircleSplitVOut *_splitv_out1 = nullptr; + luci::CircleSplitVOut *_splitv_out2 = nullptr; + luci::CircleSplitVOut *_splitv_out3 = nullptr; +}; + +class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet +{ +public: + SplitVGraph() = default; + + void init(void) + { + TestIGraphlet::init(g(), {1, 2, 2, 192}); + TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}}); + SplitVGraphlet::init(g()); + + // connect graph + _splitv->input(input()); + + output(0)->from(_splitv_out1); + output(1)->from(_splitv_out2); + output(2)->from(_splitv_out3); + } +}; + class SquaredDifferenceGraph final : public SimpleGraph { protected: @@ -929,8 +1266,11 @@ TEST(ConvertNCHWToNHWC, AddScalar) auto new_beta = dynamic_cast(g.add->y()); EXPECT_NE(nullptr, new_beta); - EXPECT_EQ(1, new_beta->rank()); + EXPECT_EQ(4, new_beta->rank()); EXPECT_EQ(1, new_beta->dim(0).value()); + EXPECT_EQ(1, new_beta->dim(1).value()); + EXPECT_EQ(1, new_beta->dim(2).value()); + EXPECT_EQ(1, new_beta->dim(3).value()); check_pre_trans(g.output->from()); } @@ -1017,6 +1357,26 @@ TEST(ConvertNCHWToNHWC, Logistic) EXPECT_EQ(16, g.logistic->dim(3).value()); } +TEST(ConvertNCHWToNHWC, LogSoftmax) +{ + LogSoftmaxGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.log_softmax->logits()); + + auto log_softmax_succs = loco::succs(g.log_softmax); + EXPECT_EQ(1, log_softmax_succs.size()); + check_post_trans(*log_softmax_succs.begin()); + + // Check log_softmax shape + EXPECT_EQ(1, g.log_softmax->dim(0).value()); + EXPECT_EQ(4, g.log_softmax->dim(1).value()); + EXPECT_EQ(4, g.log_softmax->dim(2).value()); + EXPECT_EQ(16, g.log_softmax->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, Maximum) { MaximumGraph g; @@ -1265,8 +1625,11 @@ TEST(ConvertNCHWToNHWC, MulScalar) auto new_multiplier = dynamic_cast(g.mul->y()); EXPECT_NE(nullptr, new_multiplier); - EXPECT_EQ(1, new_multiplier->rank()); + EXPECT_EQ(4, new_multiplier->rank()); EXPECT_EQ(1, new_multiplier->dim(0).value()); + EXPECT_EQ(1, new_multiplier->dim(1).value()); + EXPECT_EQ(1, new_multiplier->dim(2).value()); + EXPECT_EQ(1, new_multiplier->dim(3).value()); check_pre_trans(g.output->from()); } @@ -1451,6 +1814,85 @@ TEST(ConvertNCHWToNHWC, Preserve_Input_Output) } } +TEST(ConvertNCHWToNHWC, ReduceMax) +{ + ReduceMaxGraph g; + g.init(); + + run_phase(&g.g, false, false); + + check_pre_trans(g.rm->input()); + + auto rm_succs = loco::succs(g.rm); + EXPECT_EQ(1, rm_succs.size()); + check_post_trans(*rm_succs.begin()); + + auto new_rindices = dynamic_cast(g.rm->reduction_indices()); + EXPECT_NE(nullptr, new_rindices); + EXPECT_EQ(1, new_rindices->rank()); + EXPECT_EQ(2, new_rindices->dim(0).value()); + EXPECT_EQ(2, new_rindices->size()); + EXPECT_EQ(1, new_rindices->at(0)); + EXPECT_EQ(2, new_rindices->at(1)); +} + +TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false) +{ + struct TC + { + std::vector nchw_ind; + std::vector nhwc_ind; + std::initializer_list shape; + bool needs_transpose = false; + }; + + uint32_t n = 1; + uint32_t c = 16; + uint32_t h = 4; + uint32_t w = 4; + + std::vector test_cases{{{0}, {0}, {c, h, w}, true}, {{1}, {3}, {n, h, w}, false}, + {{2}, {1}, {n, c, w}, true}, {{3}, {2}, {n, c, h}, true}, + {{0, 1}, {0, 3}, {h, w}, false}, {{0, 2}, {0, 1}, {c, w}, true}, + {{0, 3}, {0, 2}, {c, h}, true}, {{1, 2}, {3, 1}, {n, w}, false}, + {{1, 3}, {3, 2}, {n, h}, false}, {{2, 3}, {1, 2}, {n, c}, false}, + {{0, 1, 2}, {0, 3, 1}, {w}, false}}; + + for (auto &tc : test_cases) + { + ReduceMaxGraph g; + g.keep_dims(false); + g.axes(tc.nchw_ind); + g.shape(tc.shape); + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.rm->input()); + + auto rm_succs = loco::succs(g.rm); + EXPECT_EQ(1, rm_succs.size()); + if (tc.needs_transpose) + { + EXPECT_NE(nullptr, dynamic_cast(*rm_succs.begin())); + } + else + { + EXPECT_NE(nullptr, dynamic_cast(*rm_succs.begin())); + } + + auto new_rindices = dynamic_cast(g.rm->reduction_indices()); + EXPECT_NE(nullptr, new_rindices); + EXPECT_EQ(1, new_rindices->rank()); + EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value()); + EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size()); + for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i) + { + EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at(i)); + } + } +} + TEST(ConvertNCHWToNHWC, Relu) { ReluGraph g; @@ -1511,6 +1953,57 @@ TEST(ConvertNCHWToNHWC, Rsqrt) EXPECT_EQ(16, g.rsqrt->dim(3).value()); } +TEST(ConvertNCHWToNHWC, Softmax) +{ + SoftmaxGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.softmax->logits()); + + auto softmax_succs = loco::succs(g.softmax); + EXPECT_EQ(1, softmax_succs.size()); + check_post_trans(*softmax_succs.begin()); + + // Check softmax shape + EXPECT_EQ(1, g.softmax->dim(0).value()); + EXPECT_EQ(4, g.softmax->dim(1).value()); + EXPECT_EQ(4, g.softmax->dim(2).value()); + EXPECT_EQ(16, g.softmax->dim(3).value()); +} + +TEST(ConvertNCHWToNHWC, SplitV) +{ + SplitVGraph g; + g.init(); + + run_phase(g.g(), true, true); + + check_pre_trans(g.splitv()->input()); + + auto splitv_succs = loco::succs(g.splitv()); + for (auto svo : loco::succs(g.splitv())) + { + for (auto succ : loco::succs(svo)) + { + check_post_trans(succ); + } + } + + // Check splitv() shape + EXPECT_EQ(1, g.splitv()->dim(0).value()); + EXPECT_EQ(2, g.splitv()->dim(1).value()); + EXPECT_EQ(192, g.splitv()->dim(2).value()); + EXPECT_EQ(2, g.splitv()->dim(3).value()); + + // Check axis + auto axis = dynamic_cast(g.splitv()->split_dim()); + EXPECT_NE(nullptr, axis); + EXPECT_EQ(1, axis->size()); + EXPECT_EQ(2, axis->at(0)); +} + TEST(ConvertNCHWToNHWC, SquaredDifference) { SquaredDifferenceGraph g; @@ -1602,3 +2095,31 @@ TEST(ConvertNCHWToNHWC, SubScalar) check_pre_trans(g.output->from()); } + +TEST(ConvertNCHWToNHWC, Not_Closed_Case1_NEG) +{ + NoPostReshapeGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.relu->features()); + + auto relu_succs = loco::succs(g.relu); + EXPECT_EQ(1, relu_succs.size()); + check_post_trans(*relu_succs.begin()); +} + +TEST(ConvertNCHWToNHWC, Not_Closed_Case2_NEG) +{ + ReluNotClosedGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.relu->features()); + + auto relu_succs = loco::succs(g.relu); + EXPECT_EQ(1, relu_succs.size()); + check_post_trans(*relu_succs.begin()); +} diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp index 11970ff..72f5901 100644 --- a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp +++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp @@ -184,8 +184,63 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor // For non-const activation, insert Quantize-Dequantize Ops // and dequantize the node - void visit(luci::CircleConv2D *node) { fq_activation(node); } void visit(luci::CircleAdd *node) { fq_activation(node); } + void visit(luci::CircleAveragePool2D *node) { fq_activation(node); } + void visit(luci::CircleBatchMatMul *node) { fq_activation(node); } + void visit(luci::CircleConv2D *node) { fq_activation(node); } + void visit(luci::CircleDepthwiseConv2D *node) { fq_activation(node); } + void visit(luci::CircleDiv *node) { fq_activation(node); } + void visit(luci::CircleFullyConnected *node) { fq_activation(node); } + void visit(luci::CircleInstanceNorm *node) { fq_activation(node); } + void visit(luci::CircleLeakyRelu *node) { fq_activation(node); } + void visit(luci::CircleLogistic *node) { fq_activation(node); } + void visit(luci::CircleLogSoftmax *node) { fq_activation(node); } + void visit(luci::CircleMaxPool2D *node) { fq_activation(node); } + void visit(luci::CircleMul *node) { fq_activation(node); } + void visit(luci::CircleNeg *node) { fq_activation(node); } + void visit(luci::CirclePad *node) { fq_activation(node); } + void visit(luci::CirclePRelu *node) { fq_activation(node); } + void visit(luci::CircleMean *node) { fq_activation(node); } + void visit(luci::CircleReduceMax *node) { fq_activation(node); } + void visit(luci::CircleRelu *node) { fq_activation(node); } + void visit(luci::CircleRelu6 *node) { fq_activation(node); } + void visit(luci::CircleResizeBilinear *node) { fq_activation(node); } + void visit(luci::CircleResizeNearestNeighbor *node) { fq_activation(node); } + void visit(luci::CircleRsqrt *node) { fq_activation(node); } + void visit(luci::CircleSoftmax *node) { fq_activation(node); } + void visit(luci::CircleSqrt *node) { fq_activation(node); } + void visit(luci::CircleTanh *node) { fq_activation(node); } + void visit(luci::CircleTransposeConv *node) { fq_activation(node); } + + // For Ops that do not change the value of input, do nothing + // (dtype will be automatically updated by type inference) + void visit(luci::CircleCast *) {} + void visit(luci::CircleConcatenation *) {} + void visit(luci::CircleGather *) {} + void visit(luci::CircleSlice *) {} + void visit(luci::CircleStridedSlice *) {} + void visit(luci::CircleReshape *) {} + void visit(luci::CircleSplit *) {} + void visit(luci::CircleSplitOut *) {} + void visit(luci::CircleSplitV *) {} + void visit(luci::CircleSplitVOut *) {} + void visit(luci::CircleTranspose *) {} + + // For Ops that return index, fake quantization is unnecessary + void visit(luci::CircleArgMax *) {} + + // Virtual node + void visit(luci::CircleOutputExclude *) {} + + void visit(luci::CircleQuantize *node) + { + RETURN_UNLESS(is_quant_act(node)); + + insert_dequantize(node); + } + + // Dequantize Op does nothing in fp32 model + void visit(luci::CircleDequantize *) {} }; #undef RETURN_UNLESS diff --git a/compiler/luci/pass/src/FoldDensifyPass.cpp b/compiler/luci/pass/src/FoldDensifyPass.cpp new file mode 100644 index 0000000..5ddc743 --- /dev/null +++ b/compiler/luci/pass/src/FoldDensifyPass.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDensifyPass.h" +#include "helpers/SparsityFormatConverter.h" + +#include +#include + +#include +#include + +namespace +{ + +bool is_foldable_const(luci::CircleConst *node) +{ + if (node->sparsityparam() == nullptr) + return false; + + if (node->dtype() == loco::DataType::FLOAT32) + return true; + if (node->dtype() == loco::DataType::FLOAT16) + return true; + + return false; +} + +luci::CircleConst *densified_const_node(luci::CircleConst *const_node) +{ + assert(const_node->sparsityparam()); + + auto name = const_node->name(); + assert(name.length() > 0); + auto g = const_node->graph(); + auto new_const_node = g->nodes()->create(); + + new_const_node->dtype(const_node->dtype()); + new_const_node->rank(const_node->rank()); + + uint32_t dim_size = 1; + std::vector dense_shape; + for (uint32_t i = 0; i < new_const_node->rank(); ++i) + { + assert(const_node->dim(i).known()); + new_const_node->dim(i) = const_node->dim(i); + + uint32_t value = const_node->dim(i).value(); + dim_size *= value; + dense_shape.emplace_back(static_cast(value)); + } + + if (const_node->dtype() == loco::DataType::FLOAT32) + new_const_node->size(dim_size); + else + { + assert(const_node->dtype() == loco::DataType::FLOAT16); + new_const_node->size(dim_size); + } + + new_const_node->shape_status(luci::ShapeStatus::VALID); + new_const_node->name(name + "_DS"); + + if (const_node->dtype() == loco::DataType::FLOAT32) + { + auto const_items = const_node->size(); + auto f_data = std::make_unique(const_items); + for (size_t i = 0; i < const_items; ++i) + f_data[i] = const_node->at(i); + + sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam()); + sparsity::FormatConverter converter(dense_shape, sp); + converter.SparseToDense(f_data.get()); + const auto &data_dense = converter.GetData(); + assert(data_dense.size() == dim_size); + + for (uint32_t i = 0; i < dim_size; ++i) + new_const_node->at(i) = data_dense[i]; + + luci::freeTfLiteSparsity(sp); + } + else + { + assert(const_node->dtype() == loco::DataType::FLOAT16); + + auto const_items = const_node->size(); + auto f_data = std::make_unique(const_items); + for (size_t i = 0; i < const_items; ++i) + f_data[i] = const_node->at(i); + + // Primitive type for FLOAT16 is UINT16 + sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam()); + sparsity::FormatConverter converter(dense_shape, sp); + converter.SparseToDense(f_data.get()); + const auto &data_dense = converter.GetData(); + assert(data_dense.size() == dim_size); + for (uint32_t i = 0; i < dim_size; ++i) + new_const_node->at(i) = data_dense[i]; + + luci::freeTfLiteSparsity(sp); + } + + return new_const_node; +} + +/** + * @brief Fold Densify if input is Sparse Constant + */ +bool fold_densify(luci::CircleDensify *densify) +{ + auto const_input = dynamic_cast(densify->input()); + if (not const_input) + return false; + + if (not is_foldable_const(const_input)) + return false; + + auto dense_const = densified_const_node(const_input); + assert(dense_const); + + loco::replace(densify).with(dense_const); + luci::add_origin(dense_const, luci::composite_origin( + {luci::get_origin(densify), luci::get_origin(const_input)})); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * + * [CircleConst](sparse) + * | + * [CircleDensify] + * | + * [CircleNode] + * | + * + * AFTER + * + * [CircleConst](dense) [CircleConst](sparse) + * | | + * [CircleNode] [CircleDensify] + * | + */ +bool FoldDensifyPass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto densify = dynamic_cast(node)) + { + if (fold_densify(densify)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldDensifyPass.test.cpp b/compiler/luci/pass/src/FoldDensifyPass.test.cpp new file mode 100644 index 0000000..2f9736f --- /dev/null +++ b/compiler/luci/pass/src/FoldDensifyPass.test.cpp @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDensifyPass.h" +#include "PassTestGraphs.h" + +#include + +#include + +namespace +{ + +class FoldDensifyPassGraph : public luci::ConstantFoldingAddTestGraph +{ +public: + FoldDensifyPassGraph(std::initializer_list shape) + : luci::ConstantFoldingAddTestGraph(shape, loco::DataType::FLOAT32) + { + _densify = _g.nodes()->create(); + _x = _g.nodes()->create(); + + _densify->dtype(loco::DataType::FLOAT32); + _x->dtype(loco::DataType::FLOAT32); + + _densify->shape(shape); + _x->shape(shape); + + _densify->input(_x); + + _densify->name("densify"); + _x->name("x"); + } + + loco::Node *createFoldedPattern() override { return _densify; } + +public: + void fill_const_dense(void) + { + uint32_t num_elems = 1; + for (uint32_t r = 0; r < _x->rank(); ++r) + num_elems *= _x->dim(r).value(); + + _x->size(num_elems); + for (uint32_t i = 0; i < num_elems; i++) + _x->at(i) = static_cast(i + 1); + } + + void fill_const_sparse(void) + { + // fill 4x4 of + // [[1 0 0 0] + // [0 2 0 0] + // [0 0 3 0] + // [0 0 0 4]] + + // values of 1.0, 2.0, 3.0, 4.0 + uint32_t udata[] = {0x3f800000, 0x40000000, 0x40400000, 0x40800000}; + float *fdata = reinterpret_cast(udata); + + _x->size(4); + for (uint32_t i = 0; i < 4; i++) + _x->at(i) = fdata[i]; + + auto sparsityparam = std::make_unique(); + sparsityparam->traversal_order = std::vector({0, 1}); + sparsityparam->block_map = std::vector({}); + + auto dm0 = luci::DimMetaData(luci::DimensionType::DENSE, 4); + + std::vector as_vec = {0, 1, 2, 3, 4}; + std::vector ai_vec = {0, 1, 2, 3}; + auto as = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, as_vec); + auto ai = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, ai_vec); + auto dm1 = luci::DimMetaData(luci::DimensionType::SPARSE_CSR, 0, as, ai); + sparsityparam->dim_metadata.emplace_back(dm0); + sparsityparam->dim_metadata.emplace_back(dm1); + + _x->sparsityparam(std::move(sparsityparam)); + } + +protected: + luci::CircleDensify *_densify = nullptr; + luci::CircleConst *_x = nullptr; +}; + +class FoldDensifyPassGraphTest : public FoldDensifyPassGraph, public ::testing::Test +{ +public: + FoldDensifyPassGraphTest() : FoldDensifyPassGraph({4, 4}) {} + + virtual void SetUp() { init(); } +}; + +} // namespace + +TEST(FoldDensifyPassGraph, name) +{ + luci::FoldDensifyPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FoldDensifyPassGraphTest, no_sparsity_param_NEG) +{ + fill_const_dense(); + + luci::FoldDensifyPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(FoldDensifyPassGraphTest, sparsity_param) +{ + fill_const_sparse(); + + luci::FoldDensifyPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + EXPECT_EQ(2, folded_const->rank()); + EXPECT_EQ(4, folded_const->dim(0).value()); + EXPECT_EQ(4, folded_const->dim(1).value()); + EXPECT_EQ(16, folded_const->size()); + for (int y = 0; y < 4; ++y) + { + for (int x = 0; x < 4; ++x) + { + float ovalue = folded_const->at(y * 4 + x); + float fvalue = 0.0; + if (x == y) + { + // diagonal position + fvalue = static_cast(y + 1); + } + EXPECT_EQ(fvalue, ovalue); + } + } +} diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp index 3dd4f8c..b6526de 100644 --- a/compiler/luci/pass/src/FoldDequantizePass.cpp +++ b/compiler/luci/pass/src/FoldDequantizePass.cpp @@ -19,6 +19,8 @@ #include #include +#include + namespace { @@ -32,6 +34,9 @@ bool is_hybrid_kernel_supported(loco::Node *node) bool is_foldable_const(luci::CircleConst *node) { + if (node->dtype() == loco::DataType::FLOAT16) + return true; + if (node->quantparam() == nullptr) return false; @@ -39,17 +44,18 @@ bool is_foldable_const(luci::CircleConst *node) return true; if (node->dtype() == loco::DataType::U8) return true; + if (node->dtype() == loco::DataType::S16) + return true; + if (node->dtype() == loco::DataType::S32) + return true; + if (node->dtype() == loco::DataType::S64) + return true; return false; } luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node) { - if (const_node->quantparam() == nullptr) - { - throw std::runtime_error("Given constant node has no quantization parameter"); - } - auto name = const_node->name(); assert(name.length() > 0); auto g = const_node->graph(); @@ -67,38 +73,70 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node) new_const_node->shape_status(luci::ShapeStatus::VALID); new_const_node->name(name + "_DQ"); + if (const_node->dtype() == loco::DataType::FLOAT16) + { + for (uint32_t i = 0; i < new_const_node->size(); ++i) + { + auto raw = const_node->at(i); + new_const_node->at(i) = fp16_ieee_to_fp32_value(raw); + } + return new_const_node; + } + + if (const_node->quantparam() == nullptr) + { + throw std::runtime_error("Given constant node has no quantization parameter"); + } + const int32_t q_dim = const_node->quantparam()->quantized_dimension; - const int32_t q_dim_value = const_node->dim(q_dim).value(); + // For scalar, q_dim_value is 1 + // For non-scalar, q_dim_value is the size of quantized dimension + const int32_t q_dim_value = const_node->rank() == 0 ? 1 : const_node->dim(q_dim).value(); int32_t right_count = q_dim_value; for (uint32_t i = q_dim + 1; i < const_node->rank(); ++i) right_count *= const_node->dim(i).value(); - if (const_node->dtype() == loco::DataType::S8) + for (uint32_t i = 0; i < new_const_node->size(); ++i) { - for (uint32_t i = 0; i < const_node->size(); ++i) - { - uint32_t qd = (i % right_count) / (right_count / q_dim_value); - if (qd >= const_node->quantparam()->zerop.size()) - qd = 0; + uint32_t qd = (i % right_count) / (right_count / q_dim_value); + if (qd >= const_node->quantparam()->zerop.size()) + qd = 0; - new_const_node->at(i) = - (float)(const_node->at(i) - const_node->quantparam()->zerop.at(qd)) * - const_node->quantparam()->scale.at(qd); - } - } - else - { - for (uint32_t i = 0; i < const_node->size(); ++i) + switch (const_node->dtype()) { - uint32_t qd = (i % right_count) / (right_count / q_dim_value); - if (qd >= const_node->quantparam()->zerop.size()) - qd = 0; - - new_const_node->at(i) = - (float)((int)const_node->at(i) - - const_node->quantparam()->zerop.at(qd)) * - const_node->quantparam()->scale.at(qd); + case loco::DataType::S8: + new_const_node->at(i) = + static_cast(const_node->at(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S16: + new_const_node->at(i) = + static_cast(const_node->at(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S32: + new_const_node->at(i) = + static_cast(const_node->at(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S64: + new_const_node->at(i) = + static_cast(const_node->at(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::U8: + new_const_node->at(i) = + static_cast(const_node->at(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + default: + throw std::runtime_error("Not supported dtype for FoldDequantizePass"); } } @@ -160,7 +198,7 @@ bool FoldDequantizePass::run(loco::Graph *g) { bool changed = false; - for (auto node : loco::all_nodes(g)) + for (auto node : loco::active_nodes(loco::output_nodes(g))) { if (auto circle_dequant = dynamic_cast(node)) { diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp index d82a7bc..fb5b6ad 100644 --- a/compiler/luci/pass/src/FoldDequantizePass.test.cpp +++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp @@ -15,12 +15,389 @@ */ #include "luci/Pass/FoldDequantizePass.h" +#include "PassTestGraphs.h" #include +namespace +{ + +template +class FoldDequantizeTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test +{ +public: + FoldDequantizeTest() : luci::ConstantFoldingAddTestGraph({2, 2, 2}, DT) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + _dequantize = _g.nodes()->create(); + _input = _g.nodes()->create(); + + _dequantize->dtype(loco::DataType::FLOAT32); + _input->dtype(DT); + + _input->shape({2, 2, 2}); + + _input->size
(8); + _input->at
(0) = 0; + _input->at
(1) = 1; + _input->at
(2) = 2; + _input->at
(3) = 3; + _input->at
(4) = 4; + _input->at
(5) = 5; + _input->at
(6) = 6; + _input->at
(7) = 7; + + auto qparam = std::make_unique(); + qparam->quantized_dimension = 1; + qparam->scale.push_back(5.0); + qparam->scale.push_back(10.0); + qparam->zerop.push_back(1); + qparam->zerop.push_back(2); + _input->quantparam(std::move(qparam)); + + _dequantize->input(_input); + + _dequantize->name("dequantize"); + _input->name("input"); + + return _dequantize; + } + + void createScalarPattern() + { + _input->rank(0); + _input->size
(1); + _input->at
(0) = 1; + + auto qparam = std::make_unique(); + qparam->quantized_dimension = 0; + qparam->scale.push_back(1.0); + qparam->zerop.push_back(0); + _input->quantparam(std::move(qparam)); + } + + void createNotFoldablePattern() { _input->quantparam(nullptr); } + +protected: + luci::CircleDequantize *_dequantize = nullptr; + luci::CircleConst *_input = nullptr; +}; + +class S8FoldDequantizeTest : public FoldDequantizeTest +{ +}; + +class S16FoldDequantizeTest : public FoldDequantizeTest +{ +}; + +class S32FoldDequantizeTest : public FoldDequantizeTest +{ +}; + +class S64FoldDequantizeTest : public FoldDequantizeTest +{ +}; + +class U8FoldDequantizeTest : public FoldDequantizeTest +{ +}; + +class F16FoldDequantizeTest : public luci::ConstantFoldingTestGraph, public ::testing::Test +{ +public: + F16FoldDequantizeTest() : ConstantFoldingTestGraph({2, 2}, loco::DataType::FLOAT16) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + const auto DT = loco::DataType::FLOAT16; + _dequantize = _g.nodes()->create(); + _f16const = _g.nodes()->create(); + + _dequantize->dtype(loco::DataType::FLOAT32); + _f16const->dtype(DT); + + _f16const->shape({2, 2}); + + _f16const->size(4); + _f16const->at
(0) = 49408; // -2.5f + _f16const->at
(1) = 47104; // -0.5f + _f16const->at
(2) = 0; // 0.0f + _f16const->at
(3) = 15872; // 1.5f + // NOTE how to get uint16_t value of float16 ? + // Use compiler/souschef/src/Gaussian.cpp GaussianFloat16DataChef::generate() + // uint16_t value = fp16_ieee_from_fp32_value(-2.5); + // printf("-2.5 = %u\r\n", value); + + _dequantize->input(_f16const); + + _dequantize->name("dequantize"); + _f16const->name("input"); + + _output->from(_dequantize); + + return _dequantize; + } + + void createNotFoldablePattern() { _dequantize->input(_input); } + +protected: + luci::CircleConst *getFoldedPattern() override + { + return dynamic_cast(_output->from()); + } + + void init() override { createFoldedPattern(); } + +protected: + luci::CircleDequantize *_dequantize = nullptr; + luci::CircleConst *_f16const = nullptr; +}; + +} // namespace + TEST(FoldDequantizePassTest, name) { luci::FoldDequantizePass pass; auto const name = pass.name(); ASSERT_NE(nullptr, name); } + +TEST_F(U8FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at(0)); + EXPECT_EQ(0.0, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(10.0, folded_const->at(3)); + EXPECT_EQ(15.0, folded_const->at(4)); + EXPECT_EQ(20.0, folded_const->at(5)); + EXPECT_EQ(40.0, folded_const->at(6)); + EXPECT_EQ(50.0, folded_const->at(7)); +} + +TEST_F(U8FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S8FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at(0)); + EXPECT_EQ(0.0, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(10.0, folded_const->at(3)); + EXPECT_EQ(15.0, folded_const->at(4)); + EXPECT_EQ(20.0, folded_const->at(5)); + EXPECT_EQ(40.0, folded_const->at(6)); + EXPECT_EQ(50.0, folded_const->at(7)); +} + +TEST_F(S8FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S16FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at(0)); + EXPECT_EQ(0.0, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(10.0, folded_const->at(3)); + EXPECT_EQ(15.0, folded_const->at(4)); + EXPECT_EQ(20.0, folded_const->at(5)); + EXPECT_EQ(40.0, folded_const->at(6)); + EXPECT_EQ(50.0, folded_const->at(7)); +} + +TEST_F(S16FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S32FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at(0)); + EXPECT_EQ(0.0, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(10.0, folded_const->at(3)); + EXPECT_EQ(15.0, folded_const->at(4)); + EXPECT_EQ(20.0, folded_const->at(5)); + EXPECT_EQ(40.0, folded_const->at(6)); + EXPECT_EQ(50.0, folded_const->at(7)); +} + +TEST_F(S32FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S64FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at(0)); + EXPECT_EQ(0.0, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(10.0, folded_const->at(3)); + EXPECT_EQ(15.0, folded_const->at(4)); + EXPECT_EQ(20.0, folded_const->at(5)); + EXPECT_EQ(40.0, folded_const->at(6)); + EXPECT_EQ(50.0, folded_const->at(7)); +} + +TEST_F(S64FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(U8FoldDequantizeTest, fold_dequant_scalar) +{ + createScalarPattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Check type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(0, folded_const->rank()); + EXPECT_EQ(1.0, folded_const->at(0)); +} + +TEST_F(F16FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(2, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(-2.5, folded_const->at(0)); + EXPECT_EQ(-0.5, folded_const->at(1)); + EXPECT_EQ(0.0, folded_const->at(2)); + EXPECT_EQ(1.5, folded_const->at(3)); +} + +TEST_F(F16FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp index 0c6fc43..ed60d88 100644 --- a/compiler/luci/pass/src/FoldSparseToDensePass.cpp +++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp @@ -19,6 +19,8 @@ #include +#include + namespace { diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp index 2c990f0..bc09abe 100644 --- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp +++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace { @@ -55,6 +56,26 @@ void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape) new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r); } +luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape) +{ + assert(reshape != nullptr); // FIX_CALLER_UNLESS + + luci::CircleConst *cloned_shape = clone_shape(reshape); + if (cloned_shape == nullptr) + return nullptr; + + auto cloned_node = luci::clone_node(reshape, reshape->graph()); + if (cloned_node == nullptr) + return nullptr; + + auto new_reshape = loco::must_cast(cloned_node); + new_reshape->shape(cloned_shape); + new_reshape->name(reshape->name() + "_C"); + luci::add_origin(new_reshape, luci::get_origin(reshape)); + + return new_reshape; +} + bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg) { assert(reshape != nullptr); @@ -85,6 +106,26 @@ bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg) return true; } +bool forward_reshape(luci::CircleReshape *reshape, luci::CircleLogistic *logit) +{ + assert(reshape != nullptr); // FIX_CALLER_UNLESS + assert(logit != nullptr); // FIX_CALLER_UNLESS + + auto new_reshape = create_cloned_reshape(reshape); + if (not new_reshape) + return false; + + // reconnect network + loco::replace(logit).with(new_reshape); + logit->x(reshape->tensor()); + new_reshape->tensor(logit); + + // Do shape inference for this node again. + logit->shape_status(luci::ShapeStatus::UNDEFINED); + + return true; +} + class ForwardReshape final : public luci::CircleNodeMutableVisitor { protected: @@ -103,6 +144,14 @@ protected: return forward_reshape(reshape, node); } + bool visit(luci::CircleLogistic *node) + { + auto reshape = as_reshape(node->x()); + if (reshape == nullptr) + return false; + + return forward_reshape(reshape, node); + } // TODO add more unary operators }; diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp index 2593a01..3735132 100644 --- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp +++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp @@ -65,6 +65,42 @@ protected: luci::CircleConst *_reshape_shape = nullptr; }; +// TODO Reduce duplicate code with ReshapeNegGraphlet +class ReshapeLogisticGraphlet +{ +public: + ReshapeLogisticGraphlet() = default; + +public: + void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out) + { + std::vector shape_out_v = shape_out; + + _reshape_shape = g->nodes()->create(); + _reshape = g->nodes()->create(); + _logistic = g->nodes()->create(); + + _reshape_shape->dtype(loco::DataType::S32); + _reshape_shape->rank(1); + _reshape_shape->dim(0).set(shape_out_v.size()); + _reshape_shape->shape_status(luci::ShapeStatus::VALID); + // values + const auto size = shape_out_v.size(); + _reshape_shape->size(size); + for (uint32_t i = 0; i < size; i++) + _reshape_shape->at(i) = shape_out_v[i]; + + _reshape_shape->name("reshape_shape"); + _reshape->name("reshape"); + _logistic->name("logistic"); + } + +protected: + luci::CircleReshape *_reshape = nullptr; + luci::CircleLogistic *_logistic = nullptr; + luci::CircleConst *_reshape_shape = nullptr; +}; + class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet { public: @@ -85,6 +121,26 @@ public: } }; +class ForwardReshapeToLogisticGraph : public TestIOGraph, public ReshapeLogisticGraphlet +{ +public: + ForwardReshapeToLogisticGraph() = default; + +public: + void init(const ShapeU32 shape_in, const ShapeU32 shape_out) + { + TestIOGraph::init(shape_in, shape_out); + ReshapeLogisticGraphlet::init(g(), shape_in, shape_out); + + // connect network + _reshape->tensor(input()); + _reshape->shape(_reshape_shape); + _logistic->x(_reshape); + + output()->from(_logistic); + } +}; + class ForwardReshapeToNegGraphTest : public ::testing::Test { public: @@ -101,6 +157,22 @@ protected: luci::ForwardReshapeToUnaryOpPass _pass; }; +class ForwardReshapeToLogisticGraphTest : public ::testing::Test +{ +public: + ForwardReshapeToLogisticGraphTest() = default; + + void run_pass(void) + { + while (_pass.run(_graph.g())) + ; + } + +protected: + ForwardReshapeToLogisticGraph _graph; + luci::ForwardReshapeToUnaryOpPass _pass; +}; + } // namespace TEST(ForwardReshapeToUnaryOpPassTest, name) @@ -123,3 +195,17 @@ TEST_F(ForwardReshapeToNegGraphTest, simple_forward) neg = dynamic_cast(reshape->tensor()); ASSERT_NE(nullptr, neg); } + +TEST_F(ForwardReshapeToLogisticGraphTest, forward) +{ + _graph.init({2, 2, 2}, {2, 4}); + + run_pass(); + + auto reshape = dynamic_cast(_graph.output()->from()); + auto log = dynamic_cast(_graph.output()->from()); + ASSERT_NE(nullptr, reshape); + ASSERT_EQ(nullptr, log); + log = dynamic_cast(reshape->tensor()); + ASSERT_NE(nullptr, log); +} diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp index 97a962c..3cf31ed 100644 --- a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp +++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp @@ -99,6 +99,12 @@ bool fuse_add_with_fc(luci::CircleFullyConnected *fc) fused_bias->at(i) += const_bias->at(i); } + // At this point, it is guarateed that fused_bias's shape is [1, 1, ..., N] or [N] + // where N is weights->dim(0). + // The shape is normalized to [N] to become the bias of FC + fused_bias->rank(1); + fused_bias->dim(0) = weights->dim(0); + fc->bias(fused_bias); fc->fusedActivationFunction(add->fusedActivationFunction()); diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp index 2bca570..852bc8b 100644 --- a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp +++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp @@ -37,10 +37,10 @@ namespace * \ | * [CircleTransposeConv] [CircleAdd] * | - * ([CircleRelu6]) + * ([CircleRelu/Relu6]) * | * - * Note: CircleRelu6 is inserted if Add activation is ReLU6 + * Note: CircleRelu/Relu6 is inserted if Add activation is ReLU6 */ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) { @@ -65,7 +65,8 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) if (add->dtype() != loco::DataType::FLOAT32) return false; if (add->fusedActivationFunction() != luci::FusedActFunc::NONE && - add->fusedActivationFunction() != luci::FusedActFunc::RELU6) + add->fusedActivationFunction() != luci::FusedActFunc::RELU6 && + add->fusedActivationFunction() != luci::FusedActFunc::RELU) return false; // get addition @@ -102,6 +103,19 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) // remove add node replace(add).with(relu); } + else if (add->fusedActivationFunction() == luci::FusedActFunc::RELU) + { + auto name = addition->name(); + assert(name.length() > 0); + // separate relu op from add op + auto relu = add->graph()->nodes()->create(); + relu->features(tconv); + relu->name(name + "/Relu"); + luci::add_origin(relu, luci::get_origin(add)); + + // remove add node + replace(add).with(relu); + } else { replace(add).with(tconv); diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp index 3379549..e6b54df 100644 --- a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp +++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp @@ -29,7 +29,7 @@ namespace * NOTE TF's BatchNormalization is converted to Mul and Add. * * BEFORE - * | [CircleOutputExclude] + * | [CircleConst]/[CircleOutputExclude] * | / [CircleConst] * | / / * [CircleTransposeConv] [CircleConst] @@ -40,7 +40,7 @@ namespace * | * * AFTER - * | [CircleOutputExclude] + * | [CircleConst]/[CircleOutputExclude] * +-------------------------------------+ / [CircleConst] * | | / / * | [CircleTransposeConv] [CircleConst] @@ -69,9 +69,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) return false; // check scale and shift constant attributes - if (scale->rank() != 1) + // TODO maybe rank check is not needed + if (scale->rank() != 1 && scale->rank() != 4) return false; - if (shift->rank() != 1) + if (shift->rank() != 1 && shift->rank() != 4) return false; // check mul, add attributes if (mul->dtype() != loco::DataType::FLOAT32) @@ -82,9 +83,8 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) add->fusedActivationFunction() != luci::FusedActFunc::RELU6) return false; - // tconv bias should be not set - if (not dynamic_cast(tconv->bias())) - return false; + // tconv bias is optional + auto bias = dynamic_cast(tconv->bias()); // get weight of tconv auto filter = dynamic_cast(tconv->filter()); @@ -96,10 +96,36 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) return false; auto filter_out_chn = filter->dim(0).value(); - if (filter_out_chn != scale->dim(0).value()) + // allow scale/shift and bias shape of [N], [1,1,1,N]; BN works for "channel-wise" + auto srank = scale->rank() - 1; + if (filter_out_chn != scale->dim(srank).value()) return false; - if (filter_out_chn != shift->dim(0).value()) + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != scale->dim(d).value()) + return false; + } + srank = shift->rank() - 1; + if (filter_out_chn != shift->dim(srank).value()) return false; + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != shift->dim(d).value()) + return false; + } + if (bias) + { + if (bias->dtype() != loco::DataType::FLOAT32) + return false; + srank = bias->rank() - 1; + if (filter_out_chn != bias->dim(srank).value()) + return false; + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != bias->dim(d).value()) + return false; + } + } auto name = add->name(); assert(name.length() > 0); @@ -151,6 +177,11 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) for (uint32_t c = 0; c < filter_out_chn; ++c) { fused_bias->at(c) = shift->at(c); + if (bias != nullptr) + { + fused_bias->at(c) += + bias->at(c) * scale->at(c); + } } fused_bias->name(name + "/TransposeConv/bias"); @@ -166,6 +197,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) luci::add_origin(fused_tconv, luci::composite_origin( {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)})); + if (bias != nullptr) + { + luci::add_origin(fused_tconv, luci::get_origin(bias)); + } if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6) { diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp index f3ec6cd..10a651e 100644 --- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp +++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp @@ -325,6 +325,10 @@ public: } private: + bool condition_common_1_5(uint32_t ifm_channel_depth); + bool condition_common_3_4(); + +private: template bool match(); public: @@ -368,21 +372,8 @@ private: if (not(condition)) \ return false; -template <> bool InstanceNormPattern::match() +bool InstanceNormPattern::condition_common_1_5(uint32_t ifm_channel_depth) { - CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal)); - CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm)); - - auto ifm_circle = loco::must_cast(ifm); - CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID); - CHECK_OR_FALSE(ifm_circle->rank() == 4); - CHECK_OR_FALSE(ifm_circle->dim(3).known()); - uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); - - CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma)); - - CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth)); - add_as_variance = dynamic_cast(rsqrt->x()); CHECK_OR_FALSE(add_as_variance); @@ -408,6 +399,70 @@ template <> bool InstanceNormPattern::matchx(); + CHECK_OR_FALSE(ifm); + + luci::CircleNode *ifm_node = loco::must_cast(ifm); + CHECK_OR_FALSE(ifm_node->rank() == 4); + CHECK_OR_FALSE(ifm_node->dim(3).known()); + + mean_of_ifm = dynamic_cast(sub->y()); + CHECK_OR_FALSE(mean_of_ifm); + CHECK_OR_FALSE(ifm == mean_of_ifm->input()); + + // continue search from add_as_variance + CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); + CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); + // TODO Support regarding broadcast + CHECK_OR_FALSE(const_as_epsilon->size() == 1); + + mean_as_variance = dynamic_cast(sqrt->x()); + CHECK_OR_FALSE(mean_as_variance); + + square = dynamic_cast(mean_as_variance->input()); + CHECK_OR_FALSE(square); + + sub_2 = dynamic_cast(square->x()); + CHECK_OR_FALSE(sub_2); + CHECK_OR_FALSE(ifm == sub_2->x()); + + mean_of_ifm_2 = dynamic_cast(sub_2->y()); + CHECK_OR_FALSE(mean_of_ifm_2); + CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); + + loco::Node *ifm_should_be = nullptr; + luci::CircleMean *mean_of_ifm_2_should_be = nullptr; + CHECK_OR_FALSE( + luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); + CHECK_OR_FALSE(ifm == ifm_should_be); + CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + + return true; +} + +template <> bool InstanceNormPattern::match() +{ + CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal)); + CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm)); + + auto ifm_circle = loco::must_cast(ifm); + CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID); + CHECK_OR_FALSE(ifm_circle->rank() == 4); + CHECK_OR_FALSE(ifm_circle->dim(3).known()); + uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); + + CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma)); + + CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth)); + + CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth)); + luci::CircleMul *mul_gamma_should_be = nullptr; luci::CircleMean *mean_of_ifm_should_be = nullptr; @@ -488,44 +543,7 @@ template <> bool InstanceNormPattern::matchx(); - CHECK_OR_FALSE(ifm); - - luci::CircleNode *ifm_node = loco::must_cast(ifm); - CHECK_OR_FALSE(ifm_node->rank() == 4); - CHECK_OR_FALSE(ifm_node->dim(3).known()); - - mean_of_ifm = dynamic_cast(sub->y()); - CHECK_OR_FALSE(mean_of_ifm); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - // continue search from add_as_variance - CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size() == 1); - - mean_as_variance = dynamic_cast(sqrt->x()); - CHECK_OR_FALSE(mean_as_variance); - - square = dynamic_cast(mean_as_variance->input()); - CHECK_OR_FALSE(square); - - sub_2 = dynamic_cast(square->x()); - CHECK_OR_FALSE(sub_2); - CHECK_OR_FALSE(ifm == sub_2->x()); - - mean_of_ifm_2 = dynamic_cast(sub_2->y()); - CHECK_OR_FALSE(mean_of_ifm_2); - CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); - - loco::Node *ifm_should_be = nullptr; - luci::CircleMean *mean_of_ifm_2_should_be = nullptr; - CHECK_OR_FALSE( - luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + CHECK_OR_FALSE(condition_common_3_4()); _matched = true; return true; @@ -546,44 +564,7 @@ template <> bool InstanceNormPattern::matchx(); - CHECK_OR_FALSE(ifm); - - luci::CircleNode *ifm_node = loco::must_cast(ifm); - CHECK_OR_FALSE(ifm_node->rank() == 4); - CHECK_OR_FALSE(ifm_node->dim(3).known()); - - mean_of_ifm = dynamic_cast(sub->y()); - CHECK_OR_FALSE(mean_of_ifm); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - // continue search from add_as_variance - CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size() == 1); - - mean_as_variance = dynamic_cast(sqrt->x()); - CHECK_OR_FALSE(mean_as_variance); - - square = dynamic_cast(mean_as_variance->input()); - CHECK_OR_FALSE(square); - - sub_2 = dynamic_cast(square->x()); - CHECK_OR_FALSE(sub_2); - CHECK_OR_FALSE(ifm == sub_2->x()); - - mean_of_ifm_2 = dynamic_cast(sub_2->y()); - CHECK_OR_FALSE(mean_of_ifm_2); - CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); - - loco::Node *ifm_should_be = nullptr; - luci::CircleMean *mean_of_ifm_2_should_be = nullptr; - CHECK_OR_FALSE( - luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + CHECK_OR_FALSE(condition_common_3_4()); assert(const_as_gamma == nullptr); assert(const_as_beta == nullptr); @@ -612,30 +593,7 @@ template <> bool InstanceNormPattern::matchdim(3).known()); uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); - add_as_variance = dynamic_cast(rsqrt->x()); - CHECK_OR_FALSE(add_as_variance); - - CHECK_OR_FALSE( - luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size() == 1); - - CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance)); - - sqdiff = dynamic_cast(mean_as_variance->input()); - CHECK_OR_FALSE(sqdiff); - - loco::Node *ifm_should_be = nullptr; - CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(is_instance_mean_v1(mean_of_ifm)); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - const_as_beta = dynamic_cast(sub->x()); - CHECK_OR_FALSE(const_as_beta); - CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth)); + CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth)); luci::CircleRsqrt *rsqrt_should_be = nullptr; luci::CircleMean *mean_of_ifm_should_be = nullptr; diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp index b497548..e8fa2a4 100644 --- a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp +++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp @@ -23,6 +23,7 @@ #include #include +#include namespace { diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp index 003e4c2..aaadb28 100644 --- a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp +++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp @@ -138,13 +138,18 @@ struct PropagateQParamForward final : public luci::CircleNodeMutableVisitorquantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype())); + case luci::ActivationQType::PreDefinedLogistic: + case luci::ActivationQType::PreDefinedTanh: + case luci::ActivationQType::PreDefinedSoftmax: + node->quantparam(luci::make_predefined_qparam(qtype, node->dtype())); break; case luci::ActivationQType::IntScale: luci::set_int_scale(node); break; default: + // This assert ensures this switch-satement handles all ActivationQTypes + // TODO Find a better design to remove coupling with ActivationQType + assert(qtype == luci::ActivationQType::MinMax); break; } diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp index ad86ced..06a4ae9 100644 --- a/compiler/luci/pass/src/QuantizationUtils.cpp +++ b/compiler/luci/pass/src/QuantizationUtils.cpp @@ -20,6 +20,7 @@ #include #include +#include namespace luci { @@ -276,31 +277,70 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices) indices[2] * dimension.dim(3).value() + indices[3]; } +// Activation (ofm) qtype is determined in different ways. +// 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH) +// 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL) +// 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE, +// TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details). ActivationQType activation_qtype(const CircleNode *node) { auto fused_act_node = dynamic_cast *>(node); if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) - return ActivationQType::PreDefinedValue; + return ActivationQType::PreDefinedTanh; + +#define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT) \ + { \ + auto n = loco::must_cast(node); \ + auto input = loco::must_cast(n->INPUT()); \ + return activation_qtype(input); \ + } switch (node->opcode()) { case CircleOpcode::LOGISTIC: + return ActivationQType::PreDefinedLogistic; case CircleOpcode::TANH: + return ActivationQType::PreDefinedTanh; case CircleOpcode::SOFTMAX: - return ActivationQType::PreDefinedValue; + return ActivationQType::PreDefinedSoftmax; case CircleOpcode::FLOOR: case CircleOpcode::FLOOR_DIV: case CircleOpcode::FLOOR_MOD: case CircleOpcode::CEIL: return ActivationQType::IntScale; + case CircleOpcode::GATHER: + RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params); + case CircleOpcode::RESHAPE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor); + case CircleOpcode::TRANSPOSE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a); + case CircleOpcode::STRIDED_SLICE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input); + case CircleOpcode::SPLIT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input); + case CircleOpcode::CIRCLESPLITOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input); + case CircleOpcode::SPLIT_V: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input); + case CircleOpcode::CIRCLESPLITVOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input); + case CircleOpcode::UNPACK: + RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value); + case CircleOpcode::CIRCLEUNPACKOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input); + case CircleOpcode::QUANTIZE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input); default: break; } +#undef RETURN_INPUT_ACTIVATION_QTYPE + return ActivationQType::MinMax; } -std::unique_ptr make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype) +std::unique_ptr make_predefined_qparam(ActivationQType qtype, + loco::DataType dtype) { auto qparam = std::make_unique(); @@ -309,9 +349,9 @@ std::unique_ptr make_predefined_qparam(CircleOpcode opcode, lo qparam->zerop.emplace_back(zp); }; - switch (opcode) + switch (qtype) { - case CircleOpcode::LOGISTIC: + case ActivationQType::PreDefinedLogistic: if (dtype == loco::DataType::U8) set_qparam(1.0f / 256.0f, 0); else @@ -320,7 +360,7 @@ std::unique_ptr make_predefined_qparam(CircleOpcode opcode, lo set_qparam(1.0f / 32768.0f, 0); } break; - case CircleOpcode::TANH: + case ActivationQType::PreDefinedTanh: if (dtype == loco::DataType::U8) set_qparam(2.0f / 256.0f, 128); else @@ -329,7 +369,7 @@ std::unique_ptr make_predefined_qparam(CircleOpcode opcode, lo set_qparam(1.0f / 32768.0f, 0); } break; - case CircleOpcode::SOFTMAX: + case ActivationQType::PreDefinedSoftmax: if (dtype == loco::DataType::U8) set_qparam(1.0f / 255.0f, 0); else @@ -341,7 +381,7 @@ std::unique_ptr make_predefined_qparam(CircleOpcode opcode, lo default: throw std::runtime_error("Unsupported opcode with pre-defined qparam"); } - return std::move(qparam); + return qparam; } // For nodes with integer output, we use integer scale @@ -395,4 +435,74 @@ void quant_const(luci::CircleConst *node, loco::DataType quant_type) node->quantparam(std::move(quantparam)); } +namespace +{ + +// TODO move this to a more global helper file +int nbits(loco::DataType dt) noexcept +{ + switch (dt) + { + case loco::DataType::S8: + case loco::DataType::U8: + return 8; + case loco::DataType::S16: + case loco::DataType::U16: + case loco::DataType::FLOAT16: + return 16; + case loco::DataType::S32: + case loco::DataType::U32: + case loco::DataType::FLOAT32: + return 32; + case loco::DataType::S64: + return 64; + default: + return 64; // a safe large default + } +} + +// TODO Check if the metric is valid +// Returns true if [min,max] is poorly representable +bool range_check(float min, float max, loco::DataType dtype) +{ + float thresh = 1.5f; + return log2f(max) - log2f(min) > nbits(dtype) * thresh; +} + +bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n) +{ + float min, max; + // estimate min/max + switch (n->dtype()) + { + case loco::DataType::U8: + min = scale * (0 - zp); + max = scale * (255 - zp); + break; + case loco::DataType::S16: + min = scale * (-32767); + max = scale * (32767); + break; + default: + return false; + } + return range_check(min, max, n->dtype()); +} + +} // namespace + +void warn_accuracy_with_range(luci::CircleNode *n) +{ + LOGGER(l); + auto qp = n->quantparam(); + auto k = qp->zerop.size(); + for (uint32_t i = 0; i < k; i++) + { + if (warn_scale_zp(qp->scale[i], qp->zerop[i], n)) + WARN(l) << "Quantization of " << i << "-th channel of " << n->name() + << "'s quantization may cause accuracy issues" << std::endl; + ; + } +} + } // namespace luci diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h index cd8cec9..4d5316c 100644 --- a/compiler/luci/pass/src/QuantizationUtils.h +++ b/compiler/luci/pass/src/QuantizationUtils.h @@ -62,15 +62,19 @@ bool is_quantized(const CircleNode *node); enum ActivationQType { - MinMax, // Quantize using recorded min/max - PreDefinedValue, // Quantize using pre-defined values - IntScale, // Round scale to a positive integer + MinMax, // Quantize using recorded min/max + PreDefinedLogistic, // Quantize using pre-defined values + PreDefinedTanh, // Quantize using pre-defined values + PreDefinedSoftmax, // Quantize using pre-defined values + IntScale, // Round scale to a positive integer }; ActivationQType activation_qtype(const CircleNode *node); // Create qparam with pre-defined values for speical operators -std::unique_ptr make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype); +std::unique_ptr make_predefined_qparam(CircleNode *node, loco::DataType dtype); +std::unique_ptr make_predefined_qparam(ActivationQType qtype, + loco::DataType dtype); // Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil) void set_int_scale(luci::CircleNode *node); @@ -78,6 +82,10 @@ void set_int_scale(luci::CircleNode *node); // Quantize const tensor using its min/max values void quant_const(luci::CircleConst *node, loco::DataType quant_type); +// Check that a node is quantized without significant loss of precision; +// Emits warnings to log with WARN +void warn_accuracy_with_range(luci::CircleNode *n); + } // namespace luci #endif // __LUCI_QUANTIZATION_UTILS_H__ diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp index 1493318..95251a8 100644 --- a/compiler/luci/pass/src/QuantizeActivation.cpp +++ b/compiler/luci/pass/src/QuantizeActivation.cpp @@ -114,29 +114,26 @@ void QuantizeSpecialActivation::visit(luci::CircleNode *node) auto fused_act_node = dynamic_cast *>(node); if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) { - auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type); node->quantparam(std::move(qparam)); } } void QuantizeSpecialActivation::visit(luci::CircleLogistic *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedLogistic, output_type); node->quantparam(std::move(qparam)); } void QuantizeSpecialActivation::visit(luci::CircleTanh *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type); node->quantparam(std::move(qparam)); } void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedSoftmax, output_type); node->quantparam(std::move(qparam)); } diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp index aa49623..de97a14 100644 --- a/compiler/luci/pass/src/QuantizeBias.cpp +++ b/compiler/luci/pass/src/QuantizeBias.cpp @@ -22,6 +22,7 @@ #include #include +#include using namespace luci; @@ -201,6 +202,18 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w std::vector scaling_factor(size); std::vector zp(size); + if (const_bias->rank() == 0) + { + // TODO Support quantization of scalar bias + throw std::runtime_error("Quantization of scalar bias is not yet supported (" + + const_bias->name() + ")"); + } + if (size != const_bias->dim(const_bias->rank() - 1).value()) + { + throw std::runtime_error(const_bias->name() + + " (bias) should have the shape of [1, 1, .. 1, channel]"); + } + if (output_type == loco::DataType::U8) { new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); @@ -218,6 +231,7 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w auto quantparam = std::make_unique(); quantparam->scale = scaling_factor; quantparam->zerop = zp; + quantparam->quantized_dimension = const_bias->rank() - 1; assert(new_bias->quantparam() == nullptr); // bias should not be quantized before new_bias->quantparam(std::move(quantparam)); diff --git a/compiler/luci/pass/src/QuantizeBias.test.cpp b/compiler/luci/pass/src/QuantizeBias.test.cpp new file mode 100644 index 0000000..0104a19 --- /dev/null +++ b/compiler/luci/pass/src/QuantizeBias.test.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizeBias.h" + +#include +#include +#include + +#include + +using namespace luci; + +namespace +{ + +using namespace luci::test; + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector &shape, T value) +{ + auto node = g->nodes()->create(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size
(size); \ + for (uint32_t i = 0; i < size; ++i) \ + node->at
(i) = value; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph for test + * + * BEFORE + * + * [IFM] [WEIGHTS] [BIAS(FP32)] + * \ | / + * [FC] + * | + * [OFM] + * + * AFTER + * + * [IFM] [WEIGHTS] [BIAS(Quantized)] + * \ | / + * [FC] + * | + * [OFM] + */ +struct Q8FCGraphlet +{ +public: + Q8FCGraphlet() = default; + virtual ~Q8FCGraphlet() = default; + + void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape, + const ShapeU32 bias_shape, const float bv) + { + _fc = g->nodes()->create(); + _fc->input(_x); + _x->dtype(loco::DataType::U8); + { + auto quantparam = std::make_unique(); + quantparam->scale.push_back(1.0); + quantparam->zerop.push_back(0); + quantparam->quantized_dimension = 0; + _x->quantparam(std::move(quantparam)); + } + + auto weights = create_const_node(g, loco::DataType::U8, w_shape, 1.0); + auto w_qparam = std::make_unique(); + std::vector w_scale(weights->dim(0).value(), 1.0); + std::vector w_zp(weights->dim(0).value(), 0); + w_qparam->scale = w_scale; + w_qparam->zerop = w_zp; + w_qparam->quantized_dimension = 0; + weights->quantparam(std::move(w_qparam)); + _fc->weights(weights); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->dtype(loco::DataType::U8); + _fc->shape(out_shape); + auto l = _fc->dim(_fc->rank() - 1).value(); + _fc->bias(create_const_node(g, loco::DataType::FLOAT32, bias_shape, bv)); + _fc->name("fc"); + { + auto quantparam = std::make_unique(); + quantparam->scale.push_back(1.0); + quantparam->zerop.push_back(0); + quantparam->quantized_dimension = 0; + _fc->quantparam(std::move(quantparam)); + } + } + +public: + luci::CircleFullyConnected *fc() { return _fc; } + +protected: + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleInput *_x = nullptr; +}; + +struct Q8FCGraph final : public TestIGraphlet, public TestOGraphlet, public Q8FCGraphlet +{ + void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape, + const ShapeU32 bias_shape, const float bv) + { + TestIGraphlet::init(g(), in_shape); + TestOGraphlet::init(g(), out_shape); + _x = input(); + Q8FCGraphlet::init(g(), out_shape, w_shape, bias_shape, bv); + output()->from(_fc); + } +}; + +class CQ8QuantizeBiasFCTest : public ::testing::Test +{ +public: + Q8FCGraph g; + luci::QuantizeBias qb{loco::DataType::FLOAT32, loco::DataType::U8, + luci::QuantizationGranularity::ChannelWise}; +}; + +} // namespace + +TEST_F(CQ8QuantizeBiasFCTest, fully_connected) +{ + g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 256}, 1); + g.fc()->accept(&qb); + + auto bias = loco::must_cast(g.fc()->bias()); + auto qparam = bias->quantparam(); + + EXPECT_NE(nullptr, qparam); + EXPECT_EQ(256, qparam->scale.size()); + EXPECT_EQ(256, qparam->zerop.size()); + EXPECT_EQ(1, qparam->quantized_dimension); +} + +TEST_F(CQ8QuantizeBiasFCTest, wrong_bias_shape_NEG) +{ + g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 2, 128}, 1); + EXPECT_ANY_THROW(g.fc()->accept(&qb)); // Wrong bias shape +} diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp index c9b35e0..ef047d3 100644 --- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp +++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace { @@ -352,15 +353,15 @@ private: private: // Check if // 1. node is const - // 2. node was not quantized + // 2. node's dtype is float32 bool is_quantizable(loco::Node *node) { auto const_node = dynamic_cast(node); if (not const_node) return false; - // Skip if this is already quantized - if (is_quantized(const_node)) + // Skip if this is not float32 + if (const_node->dtype() != loco::DataType::FLOAT32) return false; return true; diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp index 11322ab..500ae12 100644 --- a/compiler/luci/pass/src/QuantizeWeights.cpp +++ b/compiler/luci/pass/src/QuantizeWeights.cpp @@ -23,6 +23,7 @@ #include #include #include +#include using namespace luci; diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index d9a9d4d..0051445 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -41,10 +41,28 @@ namespace { using namespace luci; + +bool use_predefined_values(ActivationQType qtype) +{ + switch (qtype) + { + case ActivationQType::PreDefinedLogistic: + case ActivationQType::PreDefinedTanh: + case ActivationQType::PreDefinedSoftmax: + return true; + default: + // This ensures this switch-statement handles all ActivationQTypes + assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax); + break; + } + + return false; +} + // Create a Quantize Op whose // dtype is out_type // shape is the same with node -// qparam is computed using node's min/max +// qparam is computed according to node's qtype luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type) { auto quantize = node->graph()->nodes()->create(); @@ -60,9 +78,9 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType assert(qparam); // FIX_CALLER_UNLESS auto qtype = luci::activation_qtype(node); - if (qtype == ActivationQType::PreDefinedValue) + if (use_predefined_values(qtype)) { - quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type)); + quantize->quantparam(luci::make_predefined_qparam(qtype, out_type)); return quantize; } @@ -105,6 +123,23 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType return quantize; } +// Create Dequantize Op whose shape is the same with node +luci::CircleDequantize *create_dequantize(luci::CircleNode *node) +{ + auto dequantize = node->graph()->nodes()->create(); + dequantize->name(node->name() + "_Dequantize"); + dequantize->dtype(loco::DataType::FLOAT32); + dequantize->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + dequantize->dim(i).set(node->dim(i).value()); + + dequantize->shape_status(luci::ShapeStatus::VALID); + + luci::add_origin(dequantize, luci::get_origin(node)); + + return dequantize; +} + } // namespace namespace luci @@ -229,11 +264,13 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input) @@ -241,6 +278,7 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input) @@ -250,6 +288,7 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input) @@ -353,7 +392,9 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const luci::add_origin(quant_op, luci::get_origin(succ)); } - // Requantize input + // Update qparam of input + // This step is skipped if input_type is float32 + if (_ctx->input_type != loco::DataType::FLOAT32) { auto quantparam = input->quantparam(); assert(quantparam); @@ -376,11 +417,13 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const assert(_ctx->input_type == loco::DataType::S16); compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); } - input->dtype(_ctx->input_type); input->quantparam()->scale[0] = scaling_factor; input->quantparam()->zerop[0] = zp; } + // Update dtype of input + input->dtype(_ctx->input_type); + auto graph_input = inputs->at(input->index()); graph_input->dtype(_ctx->input_type); } @@ -405,13 +448,26 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const if (not from->quantparam()) continue; - // Insert Quantize Op - auto quant_op = create_quantize_op(from, _ctx->output_type); - loco::replace(from).with(quant_op); - quant_op->input(from); + // Insert Dequantize Op for float32 output_type + if (_ctx->output_type == loco::DataType::FLOAT32) + { + auto dequant_op = create_dequantize(from); + loco::replace(from).with(dequant_op); + dequant_op->input(from); + } + else + { + // Insert Quantize Op for non-float32 output_type + auto quant_op = create_quantize_op(from, _ctx->output_type); + loco::replace(from).with(quant_op); + quant_op->input(from); - // TODO Set a proper origin (Quantize should have its own Origin) - luci::add_origin(quant_op, luci::get_origin(from)); + // TODO Set a proper origin (Quantize should have its own Origin) + luci::add_origin(quant_op, luci::get_origin(from)); + } + + // Update dtype of output + output->dtype(_ctx->output_type); auto graph_output = outputs->at(output->index()); graph_output->dtype(_ctx->output_type); @@ -594,12 +650,25 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // Set output type set_output_type(g); + // Remove redundant Quantize Op + { + logo::Phase phase; + + phase.emplace_back(std::make_unique()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); + } + // Remove min/max values for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast(node); if (auto qparam = circle_node->quantparam()) { + warn_accuracy_with_range(circle_node); qparam->min.clear(); qparam->max.clear(); } diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp index cebafd3..21b4fe1 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp +++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp @@ -1088,6 +1088,31 @@ private: luci::CircleConst *_const = nullptr; }; +class ReduceMaxTestGraph final : public SimpleTestGraph +{ +public: + void init(void) override + { + TestIOGraph::init({4, 3, 2}, {2}); + + _axis = create_const(g(), {4}, {1, 0, -3, -3}); + _reduce_max = g()->nodes()->create(); + { + _reduce_max->input(input()); + _reduce_max->reduction_indices(_axis); + _reduce_max->name("test"); + _reduce_max->keep_dims(false); + } + output()->from(_reduce_max); + + set_minmax_to_non_const(g(), -1, 1); + } + +private: + luci::CircleReduceMax *_reduce_max = nullptr; + luci::CircleConst *_axis = nullptr; +}; + class ResizeBilinearTestGraph final : public SimpleTestGraph { public: @@ -2345,6 +2370,34 @@ TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG) SUCCEED(); } +TEST(QuantizedModelVerifierTest, ReduceMax) +{ + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, ReduceMax_wrong_type_NEG) +{ + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, ReduceMax_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + TEST(QuantizedModelVerifierTest, ResizeBilinear) { TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise); diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp new file mode 100644 index 0000000..66cd9d7 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantDequantizePass.h" + +#include + +namespace +{ + +bool remove_redundant_dequant(luci::CircleDequantize *dequant) +{ + assert(dequant != nullptr); + + auto prev = loco::must_cast(dequant->input()); + if (prev->dtype() != loco::DataType::FLOAT32) + return false; + + replace(dequant).with(prev); + + return true; +} + +} // namespace + +namespace luci +{ +/** + * Dequantize Op does the below things on the ifm. + * 1. Element-wise update of quantized values (u8/s16) to fp32 values + * 2. Update dtype to fp32 + * If the previous node is not quantized, dequantize Op is redundant. + * + * BEFORE + * + * [CircleNode (A)] + * | + * [CircleNode (B)] (fp32) + * | + * [CircleDequantize] + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode (A)] + * | + * [CircleNode (B)] (fp32) + * | + * [CircleNode] + */ +bool RemoveRedundantDequantizePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto target_node = dynamic_cast(node); + if (target_node != nullptr) + { + if (remove_redundant_dequant(target_node)) + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp new file mode 100644 index 0000000..adb2f14 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantDequantizePass.h" + +#include + +#include + +#include + +namespace +{ + +using namespace luci::test; + +class DequantizeGraphlet +{ +public: + DequantizeGraphlet() = default; + +public: + void init(loco::Graph *g) + { + _dequantize = g->nodes()->create(); + _dequantize->dtype(loco::DataType::FLOAT32); + _dequantize->name("dequantize"); + } + +protected: + luci::CircleDequantize *_dequantize = nullptr; +}; + +class RedundantDequantizeGraph : public TestIOGraph, public DequantizeGraphlet +{ +public: + RedundantDequantizeGraph() = default; + +public: + void init(void) + { + TestIOGraph::init({1}, {1}); + DequantizeGraphlet::init(g()); + + _dequantize->input(input()); + + output()->from(_dequantize); + } + + void init_u8_input(void) + { + TestIOGraph::init({1}, {1}); + DequantizeGraphlet::init(g()); + + // Use u8 input (dequantize is not redundant anymore) + input()->dtype(loco::DataType::U8); + { + auto qparam = std::make_unique(); + qparam->scale = {1}; + qparam->zerop = {1}; + input()->quantparam(std::move(qparam)); + } + + _dequantize->input(input()); + + output()->from(_dequantize); + } +}; + +} // namespace + +TEST(RemoveRedundantDequantizePass, single_redundant_dequantize) +{ + RedundantDequantizeGraph g; + luci::RemoveRedundantDequantizePass pass; + + g.init(); + + EXPECT_TRUE(pass.run(g.g())); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(g.g()))) + { + if (dynamic_cast(node)) + { + count++; + } + } + + ASSERT_EQ(0, count); +} + +TEST(RemoveRedundantDequantizePass, wrong_dtype_NEG) +{ + RedundantDequantizeGraph g; + luci::RemoveRedundantDequantizePass pass; + + g.init_u8_input(); + + EXPECT_FALSE(pass.run(g.g())); +} diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp new file mode 100644 index 0000000..476ec68 --- /dev/null +++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" + +#include + +namespace +{ + +bool acceptable_intermediate_op(const loco::Node *node) +{ + if (not node) + return false; + + const auto opcode = loco::must_cast(node)->opcode(); + + switch (opcode) + { + case luci::CircleOpcode::ADD: + case luci::CircleOpcode::MUL: + case luci::CircleOpcode::TANH: + case luci::CircleOpcode::LOGISTIC: + break; + + default: + return false; + } + + return true; +} + +bool same_shape(const loco::Node *a, const loco::Node *b) +{ + auto a_cnode = loco::must_cast(a); + auto b_cnode = loco::must_cast(b); + + if (a_cnode->rank() != b_cnode->rank()) + return false; + + for (uint32_t i = 0; i < a_cnode->rank(); i++) + { + if (not(a_cnode->dim(i) == b_cnode->dim(i))) + return false; + } + return true; +} + +class PreReshapeFinder +{ +public: + PreReshapeFinder(const luci::CircleReshape *post_reshape) : _post_reshape(post_reshape) + { + assert(post_reshape != nullptr); // FIX_CALLER_UNLESS + } + +public: + // Return true if pre_reshapes are found + bool collect_pre_reshapes(loco::Node *node) + { + // TODO Support diamond case + if (loco::succs(node).size() != 1) + return false; + + if (auto pre_reshape = dynamic_cast(node)) + { + // Check ifm of pre-reshape and ofm of post_reshape + if (not same_shape(pre_reshape->tensor(), _post_reshape)) + return false; + + // Check ofm of pre-reshape and ifm of post_reshape + if (not same_shape(pre_reshape, _post_reshape->tensor())) + return false; + + _pre_reshapes.emplace_back(pre_reshape); + return true; + } + + if (not acceptable_intermediate_op(node)) + return false; + + for (uint32_t i = 0; i < node->arity(); i++) + { + if (not collect_pre_reshapes(node->arg(i))) + return false; + } + + return true; + } + +public: + std::vector pre_reshapes(void) const { return _pre_reshapes; } + +private: + const luci::CircleReshape *_post_reshape = nullptr; + std::vector _pre_reshapes; +}; + +bool remove_unnecessary_reshape_net(luci::CircleReshape *reshape) +{ + PreReshapeFinder finder(reshape); + if (not finder.collect_pre_reshapes(reshape->tensor())) + return false; + + // Remove pre_reshapes + for (auto pre_reshape : finder.pre_reshapes()) + { + loco::replace(pre_reshape).with(pre_reshape->tensor()); + } + + // Remove post_reshape + loco::replace(reshape).with(reshape->tensor()); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * + * [CircleNode] + * | + * [CircleReshape_1] (shape: A -> B) + * | + * [CircleNode] (ex: Add/Mul/Tanh/Logistic ..) + * | + * [CircleReshape_2] (shape: B -> A) + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode] + * | \ + * | [CircleReshape_1] + * [CircleNode] + * | \ + * | [CircleReshape_2] + * [CircleNode] + **/ +bool RemoveUnnecessaryReshapeNetPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto reshape_node = dynamic_cast(node)) + { + if (remove_unnecessary_reshape_net(reshape_node)) + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp new file mode 100644 index 0000000..4ad707b --- /dev/null +++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" + +#include + +#include + +namespace +{ + +class RemoveUnnecessaryReshapeNet : public ::testing::Test +{ +public: + RemoveUnnecessaryReshapeNet() {} + + void createReshapeConst(luci::CircleReshape *target, const std::vector shape) + { + auto shape_const = g.nodes()->create(); + shape_const->dtype(loco::DataType::S32); + shape_const->size(shape.size()); + shape_const->shape_status(luci::ShapeStatus::VALID); + shape_const->rank(1); + shape_const->dim(0).set(shape.size()); + for (int32_t i = 0; i < shape.size(); i++) + { + shape_const->at(i) = static_cast(shape.at(i)); + } + shape_const->name("shape_const"); + target->shape(shape_const); + target->rank(shape.size()); + for (uint32_t i = 0; i < shape.size(); i++) + { + target->dim(i) = shape[i]; + } + target->shape_status(luci::ShapeStatus::VALID); + } + + void buildGraph(const std::initializer_list base_shape, + const std::initializer_list first_shape, + const std::initializer_list second_shape) + { + // Input Create. + input = g.nodes()->create(); + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + input->shape_status(luci::ShapeStatus::VALID); + input->shape(base_shape); + input->name("input"); + + // Create first reshape. + first_reshape = g.nodes()->create(); + first_reshape->tensor(input); + first_reshape->name("Reshape"); + createReshapeConst(first_reshape, first_shape); + + // Create logistic. + logistic = g.nodes()->create(); + logistic->x(first_reshape); + logistic->name("logistic"); + logistic->shape(first_shape); + logistic->shape_status(luci::ShapeStatus::VALID); + + // Create second reshape. + second_reshape = g.nodes()->create(); + second_reshape->tensor(logistic); + second_reshape->name("second_reshape"); + createReshapeConst(second_reshape, second_shape); + + // Output Connect. + output = g.nodes()->create(); + output->from(second_reshape); + output->name("output"); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleReshape *first_reshape = nullptr; + luci::CircleLogistic *logistic = nullptr; + luci::CircleReshape *second_reshape = nullptr; + luci::CircleOutput *output = nullptr; +}; + +} // namespace + +TEST_F(RemoveUnnecessaryReshapeNet, simple_case) +{ + buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 1, 32}); + luci::RemoveUnnecessaryReshapeNetPass pass; + + ASSERT_TRUE(pass.run(&g)); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(&g))) + { + if (auto reshape = dynamic_cast(node)) + count++; + } + ASSERT_EQ(0, count); +} + +TEST_F(RemoveUnnecessaryReshapeNet, shape_mismatch_NEG) +{ + buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 2, 16}); + luci::RemoveUnnecessaryReshapeNetPass pass; + ASSERT_FALSE(pass.run(&g)); +} diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp new file mode 100644 index 0000000..741b709 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace +{ + +// TODO move to global helper list if needed +/** + * @brief Create a node with `inp` as input from fused activation fucntion `act` + */ +luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act) +{ + switch (act) + { + case luci::FusedActFunc::NONE: + return inp; + case luci::FusedActFunc::RELU: + { + auto n = inp->graph()->nodes()->create(); + n->features(inp); + return n; + } + case luci::FusedActFunc::RELU6: + { + auto n = inp->graph()->nodes()->create(); + n->features(inp); + return n; + } + case luci::FusedActFunc::RELU_N1_TO_1: + { + auto n = inp->graph()->nodes()->create(); + n->features(inp); + return n; + } + case luci::FusedActFunc::TANH: + { + auto n = inp->graph()->nodes()->create(); + n->x(inp); + return n; + } + case luci::FusedActFunc::SIGN_BIT: + { + throw std::invalid_argument("no matching node to create from fused activation"); + } + default: + throw std::invalid_argument("invalid fused activation"); + } +} + +/** + * Replace Fully Connected with Batched MatMul + * + * BEFORE + * + * [Node1] [Node2] + * | | + * [transpose]? [transpose]? + * \ / + * [FullyConnected] + * + * AFTER + * + * [Node1] [Node2] + * \ / + * [BatchMatMul] [BiasValue]? + * \ / + * [Add]? + * | + * [Activation]? + * + * Nodes with "?" denote optional elements + */ +bool replace_fc_with_matmul(luci::CircleFullyConnected *fc) +{ + luci::CircleNode *x = nullptr; + luci::CircleNode *y = nullptr; + luci::CircleNode *b = nullptr; + luci::CircleTranspose *ty = nullptr; + luci::CircleTranspose *tx = nullptr; + bool adj_x = false; + bool adj_y = true; + + if (dynamic_cast(fc->weights())) + return false; // NonConst + + if ((ty = dynamic_cast(fc->weights()))) // is y a transpose? + { + adj_y = false; + if (dynamic_cast(ty->a())) + return false; + else + y = loco::must_cast(ty->a()); + } + else + { // y is not transpose and not const + y = loco::must_cast(fc->weights()); + } + if ((tx = dynamic_cast(fc->input()))) + { + adj_x = true; + x = loco::must_cast(tx->a()); + } + else + { + x = loco::must_cast(fc->input()); + } + + b = loco::must_cast(fc->bias()); + + if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 || + b->dtype() != loco::DataType::FLOAT32) + return false; + + auto name = fc->name(); + assert(name.length() > 0); + + auto matmul = fc->graph()->nodes()->create(); + matmul->x(x); + matmul->y(y); + matmul->adj_x(adj_x); + matmul->adj_y(adj_y); + matmul->name(name); + matmul->dtype(fc->dtype()); + + luci::add_origin(matmul, luci::get_origin(fc)); + + auto all_zero = [](const luci::CircleConst *c) { + bool ac = true; + for (uint32_t i = 0; i < c->size() && ac; i++) + { + ac &= c->at(i) == 0.0f; + } + return ac; + }; + + auto bc = dynamic_cast(b); + if ((nullptr != bc) && !all_zero(bc)) + { + auto bias_add = fc->graph()->nodes()->create(); + bias_add->x(matmul); + bias_add->y(b); + bias_add->name(fc->name() + "/bias_add"); + bias_add->dtype(fc->dtype()); + add_origin(bias_add, get_origin(fc)); + bias_add->fusedActivationFunction(fc->fusedActivationFunction()); + loco::replace(fc).with(bias_add); + } + else + { + auto n = fromActivation(matmul, fc->fusedActivationFunction()); + add_origin(n, luci::get_origin(fc)); + n->name(fc->name() + "fusedActivation"); + n->dtype(fc->dtype()); + loco::replace(fc).with(n); + } + + return true; +} +} // namespace + +namespace luci +{ + +bool ReplaceNonConstFCWithBatchMatMulPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto fc = dynamic_cast(node)) + { + if (replace_fc_with_matmul(fc)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp new file mode 100644 index 0000000..7606a61 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h" + +#include +#include + +#include + +namespace +{ + +using namespace luci::test; + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector &shape, + const std::vector &values) +{ + auto node = g->nodes()->create(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size
(size); \ + for (uint32_t i = 0; i < values.size(); ++i) \ + node->at
(i) = values[i]; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph for test + * + * BEFORE + * + * [IFM1] [IFM2] [BIAS] + * \ | / + * [FC] + * | + * [Res] + * + * AFTER + * [IFM1] [IFM2] + * \ | + * [BatchMatMul] [BIAS] + * \ / + * [Add] + * | + * [Res] + * + */ +struct FCGraphlet +{ +public: + FCGraphlet() = default; + virtual ~FCGraphlet() = default; + + void init(loco::Graph *g, const ShapeU32 r_shape, const float bv) + { + _tr_y = g->nodes()->create(); + _tr_y->a(_y); + std::vector tr_val = {1, 0}; + _tr_y->perm(create_const_node(g, loco::DataType::S32, {2}, tr_val)); + + _fc = g->nodes()->create(); + _fc->input(_x); + _fc->weights(_tr_y); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->dtype(loco::DataType::FLOAT32); + _fc->shape(r_shape); + auto l = _fc->dim(_fc->rank() - 1).value(); + std::vector bias_val(l, bv); + _fc->bias(create_const_node(g, loco::DataType::FLOAT32, {l}, bias_val)); + _fc->name("fc"); + } + +public: + luci::CircleFullyConnected *fc() { return _fc; } + +protected: + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleTranspose *_tr_y = nullptr; + luci::CircleInput *_x = nullptr; + luci::CircleInput *_y = nullptr; +}; + +struct FCGraph : public TestIsGraphlet<2>, public TestOGraphlet, public FCGraphlet +{ + FCGraph() = default; + virtual ~FCGraph() = default; + void init(const ShapeU32 x_shape, const ShapeU32 y_shape, const ShapeU32 r_shape, const float bv) + { + TestIsGraphlet<2>::init(g(), {x_shape, y_shape}); + TestOGraphlet::init(g(), r_shape); + _x = input(0); + _y = input(1); + FCGraphlet::init(g(), r_shape, bv); + output()->from(_fc); + } +}; + +class ReplaceNonConstFCWithBatchMatMulPassTest : public ::testing::Test +{ +public: + FCGraph g; + luci::ReplaceNonConstFCWithBatchMatMulPass pass; +}; + +} // namespace + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test) +{ + g.init({2, 3}, {2, 3}, {2, 2}, 0.0f); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto mm = dynamic_cast(g.output()->from()); + EXPECT_NE(nullptr, mm); +} + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test) +{ + g.init({2, 3}, {2, 3}, {2, 2}, 1.0f); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto mm = dynamic_cast(g.output()->from()); + EXPECT_NE(nullptr, mm); +} + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, wrong_op_NEG) +{ + loco::Graph g; + + auto inp = g.nodes()->create(); + auto relu = g.nodes()->create(); + relu->features(inp); + + luci::ReplaceNonConstFCWithBatchMatMulPass pass; + auto changed = pass.run(&g); + + EXPECT_EQ(false, changed); +} diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp new file mode 100644 index 0000000..a650658 --- /dev/null +++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ResolveCustomOpSplitVPass.h" + +#include +#include +#include + +namespace +{ + +// Input node is const S64 +// Return s32 version of node +// Return nullptr if s64 value is out of range of s32 +luci::CircleConst *s64_to_s32(luci::CircleConst *node) +{ + assert(node); + assert(node->dtype() == loco::DataType::S64); + + auto cloned = luci::clone(node); + luci::add_origin(cloned, luci::get_origin(node)); + + const auto num_elems = node->size(); + + cloned->dtype(loco::DataType::S32); + cloned->size(num_elems); + + for (uint32_t i = 0; i < num_elems; i++) + { + int64_t val = node->at(i); + if (val < std::numeric_limits::min() or val > std::numeric_limits::max()) + return nullptr; + + cloned->at(i) = static_cast(val); + } + + return cloned; +} + +/** BEFORE + * + * [CircleNode] + * \ + * \ [size_splits] [split_dim] + * \ | / + * [CircleCustom(SplitV))] + * | + * [CircleCustomOut] + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode] + * | \ + * | \ [size_splits] [split_dim] + * | \ | / + * | \ | / + * | \ | / + * [CircleCustom(SplitV)] [CircleSplitV] + * | | + * [CircleCustomOut] [CircleSplitVOut] + * | + * [CircleNode] + */ +bool resolve_splitv(luci::CircleCustom *node) +{ + const std::string custom_code = node->custom_code(); + const std::vector custom_options = node->custom_options(); + + if (custom_code != "SplitV") + return false; + + if (node->numInputs() != 3) + return false; + + auto size_splits = dynamic_cast(node->inputs(1)); + if (not size_splits) + return false; + + // Convert size_splits to S32, because luci-interpeter does not support + // S64 size_splits yet + // TODO Support S64 size_splits + if (size_splits->dtype() == loco::DataType::S64) + { + size_splits = s64_to_s32(size_splits); + if (not size_splits) + return false; + } + if (size_splits->dtype() != loco::DataType::S32) + return false; + + auto split_dim = dynamic_cast(node->inputs(2)); + if (not split_dim) + return false; + + if (split_dim->dtype() == loco::DataType::S64) + { + split_dim = s64_to_s32(split_dim); + if (not split_dim) + return false; + } + if (split_dim->dtype() != loco::DataType::S32) + return false; + + if (size_splits->rank() != 1) + return false; + + const auto num_split = size_splits->dim(0).value(); + + auto split_v = node->graph()->nodes()->create(); + split_v->input(node->inputs(0)); + split_v->size_splits(size_splits); + split_v->split_dim(split_dim); + split_v->num_split(num_split); + split_v->name(node->name()); + luci::add_origin(split_v, luci::get_origin(node)); + + int32_t i = 0; + const auto succs = loco::succs(node); + for (auto succ : succs) + { + auto custom_out = loco::must_cast(succ); // FIX_CALLER_UNLESS + + auto split_v_out = node->graph()->nodes()->create(); + split_v_out->input(split_v); + split_v_out->name(node->name() + "_out_" + std::to_string(i)); + split_v_out->index(i++); + luci::add_origin(split_v_out, luci::get_origin(node)); + loco::replace(custom_out).with(split_v_out); + } + + return true; +} + +} // namespace + +namespace luci +{ + +bool ResolveCustomOpSplitVPass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto cop = dynamic_cast(node); + if (not cop) + continue; + + if (resolve_splitv(cop)) + changed = true; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp new file mode 100644 index 0000000..e7738aa --- /dev/null +++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ResolveCustomOpSplitVPass.h" + +#include + +#include +#include + +using namespace luci::test; + +namespace +{ + +/** + * graph having Custom operator SplitV + * + * [Input] [Const] [Const] + * \ | / + * [Custom(SplitV)] + * / | \ + * [CustomOut] [CustomOut] [CustomOut] + * | | | + * [Output] [Output] [Output] + */ +class SplitVGraphlet +{ +public: + SplitVGraphlet() = default; + +public: + void init(loco::Graph *g) + { + // CircleCustom(SplitV) + _splitv = g->nodes()->create(3, 3); + _splitv->custom_code("SplitV"); + _splitv->shape({1, 2, 2, 192}); + _splitv->dtype(loco::DataType::FLOAT32); + _splitv->name("splitv"); + + // CircleConst + auto size_splits = g->nodes()->create(); + size_splits->dtype(loco::DataType::S64); + size_splits->shape({3}); + size_splits->size(3); + size_splits->at(0) = 32; + size_splits->at(1) = 32; + size_splits->at(2) = 128; + + // CircleConst + auto split_dim = g->nodes()->create(); + split_dim->dtype(loco::DataType::S32); + split_dim->rank(0); + split_dim->size(1); + split_dim->scalar() = 3; + + _splitv->inputs(1, size_splits); + _splitv->inputs(2, split_dim); + + // CircleCustomOut + _splitv_out1 = g->nodes()->create(); + _splitv_out1->shape({1, 2, 2, 32}); + _splitv_out1->dtype(loco::DataType::FLOAT32); + _splitv_out1->index(0); + _splitv_out1->input(_splitv); + + // CircleCustomOut + _splitv_out2 = g->nodes()->create(); + _splitv_out2->shape({1, 2, 2, 32}); + _splitv_out2->dtype(loco::DataType::FLOAT32); + _splitv_out2->index(1); + _splitv_out2->input(_splitv); + + // CircleCustomOut + _splitv_out3 = g->nodes()->create(); + _splitv_out3->shape({1, 2, 2, 128}); + _splitv_out3->dtype(loco::DataType::FLOAT32); + _splitv_out3->index(2); + _splitv_out3->input(_splitv); + } + +public: + luci::CircleCustom *splitv() { return _splitv; } + +protected: + luci::CircleCustom *_splitv = nullptr; + luci::CircleCustomOut *_splitv_out1 = nullptr; + luci::CircleCustomOut *_splitv_out2 = nullptr; + luci::CircleCustomOut *_splitv_out3 = nullptr; +}; + +class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet +{ +public: + SplitVGraph() = default; + + void init(void) + { + TestIGraphlet::init(g(), {1, 2, 2, 192}); + TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}}); + SplitVGraphlet::init(g()); + + // connect graph + _splitv->inputs(0, input()); + + output(0)->from(_splitv_out1); + output(1)->from(_splitv_out2); + output(2)->from(_splitv_out3); + } +}; + +class SplitVGraphTest : public ::testing::Test +{ +public: + SplitVGraph g; + luci::ResolveCustomOpSplitVPass pass; +}; + +} // namespace + +TEST_F(SplitVGraphTest, simple_test) +{ + g.init(); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto svo_1 = dynamic_cast(g.output(0)->from()); + EXPECT_NE(nullptr, svo_1); + auto svo_2 = dynamic_cast(g.output(1)->from()); + EXPECT_NE(nullptr, svo_2); + auto svo_3 = dynamic_cast(g.output(2)->from()); + EXPECT_NE(nullptr, svo_3); + + auto sv = dynamic_cast(svo_1->input()); + EXPECT_NE(nullptr, sv); + sv = dynamic_cast(svo_2->input()); + EXPECT_NE(nullptr, sv); + sv = dynamic_cast(svo_3->input()); + EXPECT_NE(nullptr, sv); + + auto size_splits = loco::must_cast(sv->size_splits()); + EXPECT_EQ(loco::DataType::S32, size_splits->dtype()); + EXPECT_EQ(32, size_splits->at(0)); + EXPECT_EQ(32, size_splits->at(1)); + EXPECT_EQ(128, size_splits->at(2)); + + auto split_dim = loco::must_cast(sv->split_dim()); + EXPECT_EQ(loco::DataType::S32, split_dim->dtype()); + EXPECT_EQ(3, split_dim->scalar()); +} + +TEST_F(SplitVGraphTest, wrong_op_NEG) +{ + g.init(); + + g.splitv()->custom_code("AddV2"); + + auto ret = pass.run(g.g()); + EXPECT_EQ(false, ret); +} diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h index 442183c..408e6b8 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h @@ -197,6 +197,13 @@ private: return true; } + bool visit(const luci::CircleReduceMax *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)); + RETURN_FALSE_UNLESS(is_lwq(node->input())); + return true; + } + bool visit(const luci::CircleRelu *node) { RETURN_FALSE_UNLESS(is_lwq(node)); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp index 4e1c062..cf86aca 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp @@ -302,6 +302,15 @@ bool VerifyQuantizedNodeTypeBase::visit(const luci::CirclePow *nod } template +bool VerifyQuantizedNodeTypeBase::visit(const luci::CircleReduceMax *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32)) + return true; +} + +template bool VerifyQuantizedNodeTypeBase::visit(const luci::CircleRelu *node) { return group_has_type(node, Qtype); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h index ff1acbd..789d3c7 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeType.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h @@ -104,6 +104,7 @@ private: bool visit(const luci::CirclePadV2 *node); bool visit(const luci::CirclePRelu *node); bool visit(const luci::CirclePow *node); + bool visit(const luci::CircleReduceMax *node); bool visit(const luci::CircleRelu *node); bool visit(const luci::CircleReshape *node); bool visit(const luci::CircleResizeBilinear *node); diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp new file mode 100644 index 0000000..72b7d60 --- /dev/null +++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +#include "SparsityFormatConverter.h" + +#include + +#include + +namespace sparsity +{ + +namespace +{ + +uint64_t GetFlattenedIndex(const std::vector &indices, const std::vector &shape) +{ + uint64_t index = 0; + int sub_elements = 1; + for (int i = shape.size() - 1; i >= 0; i--) + { + index += indices[i] * sub_elements; + sub_elements *= shape[i]; + } + return index; +} + +std::vector TfLiteIntArrayToVector(const TfLiteIntArray *int_array) +{ + std::vector values; + if (!int_array) + { + return values; + } + + values.resize(int_array->size); + for (int i = 0; i < int_array->size; i++) + { + values[i] = int_array->data[i]; + } + + return values; +} + +} // namespace + +template +FormatConverter::FormatConverter(const std::vector &shape, const TfLiteSparsity &sparsity) +{ + auto traversal_order = TfLiteIntArrayToVector(sparsity.traversal_order); + auto block_map = TfLiteIntArrayToVector(sparsity.block_map); + + std::vector format(sparsity.dim_metadata_size); + std::vector dense_size(sparsity.dim_metadata_size); + std::vector> segments(sparsity.dim_metadata_size); + std::vector> indices(sparsity.dim_metadata_size); + for (int i = 0; i < sparsity.dim_metadata_size; i++) + { + format[i] = sparsity.dim_metadata[i].format; + dense_size[i] = sparsity.dim_metadata[i].dense_size; + segments[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_segments); + indices[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_indices); + } + + InitSparseToDenseConverter(shape, std::move(traversal_order), std::move(format), + std::move(dense_size), std::move(segments), std::move(indices), + std::move(block_map)); +} + +template +void FormatConverter::InitSparseToDenseConverter( + std::vector shape, std::vector traversal_order, std::vector format, + std::vector dense_size, std::vector> segments, + std::vector> indices, std::vector block_map) +{ + dense_shape_ = std::move(shape); + traversal_order_ = std::move(traversal_order); + block_map_ = std::move(block_map); + format_ = std::move(format); + + dense_size_ = 1; + for (size_t i = 0; i < dense_shape_.size(); i++) + { + dense_size_ *= dense_shape_[i]; + } + + dim_metadata_.resize(2 * format_.size()); + for (size_t i = 0; i < format_.size(); i++) + { + if (format_[i] == kTfLiteDimDense) + { + dim_metadata_[2 * i] = {dense_size[i]}; + } + else + { + dim_metadata_[2 * i] = std::move(segments[i]); + dim_metadata_[2 * i + 1] = std::move(indices[i]); + } + } + + int original_rank = dense_shape_.size(); + int block_dim = 0; + + blocked_shape_.resize(original_rank); + block_size_.resize(block_map_.size()); + for (int i = 0; i < original_rank; i++) + { + if (block_dim < (int)block_map_.size() && block_map_[block_dim] == i) + { + if (original_rank + block_dim < (int)traversal_order_.size()) + { + int orig_dim = traversal_order_[original_rank + block_dim]; + block_size_[block_dim] = dense_size[orig_dim]; + blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim]; + block_dim++; + } + } + else + { + blocked_shape_[i] = dense_shape_[i]; + } + } +} + +template +void FormatConverter::Populate(const T *src_data, std::vector indices, int level, + int prev_idx, int *src_data_ptr, T *dest_data) +{ + if (static_cast(level) == indices.size()) + { + int orig_rank = dense_shape_.size(); + std::vector orig_idx; + orig_idx.resize(orig_rank); + int i = 0; + for (; static_cast(i) < orig_idx.size(); i++) + { + int orig_dim = traversal_order_[i]; + orig_idx[orig_dim] = indices[i]; + } + + for (; static_cast(i) < indices.size(); i++) + { + const int block_idx = traversal_order_[i] - orig_rank; + const int orig_dim = block_map_[block_idx]; + orig_idx[orig_dim] = orig_idx[orig_dim] * block_size_[block_idx] + indices[i]; + } + + dest_data[GetFlattenedIndex(orig_idx, dense_shape_)] = src_data[*src_data_ptr]; + + *src_data_ptr = *src_data_ptr + 1; + return; + } + + const int metadata_idx = 2 * level; + const int shape_of_level = dim_metadata_[metadata_idx][0]; + if (format_[level] == kTfLiteDimDense) + { + for (int i = 0; i < shape_of_level; i++) + { + indices[level] = i; + Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr, + dest_data); + } + } + else if (static_cast(prev_idx + 1) < dim_metadata_[metadata_idx].size()) + { + const auto &array_segments = dim_metadata_[metadata_idx]; + const auto &array_indices = dim_metadata_[metadata_idx + 1]; + for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++) + { + if (static_cast(i) < array_indices.size() && + static_cast(level) < indices.size()) + { + indices[level] = array_indices[i]; + Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data); + } + } + } +} + +template bool FormatConverter::SparseToDense(const T *src_data) +{ + data_.resize(dense_size_); + std::fill(data_.begin(), data_.end(), T(0)); + + int total_rank = traversal_order_.size(); + int src_data_ptr = 0; + std::vector indices(total_rank); + Populate(src_data, indices, 0, 0, &src_data_ptr, data_.data()); + + return true; +} + +template class FormatConverter; +template class FormatConverter; + +} // namespace sparsity + +#include + +namespace luci +{ + +sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt) +{ + switch (dt) + { + case luci::DimensionType::DENSE: + return sparsity::TfLiteDimensionType::kTfLiteDimDense; + case luci::DimensionType::SPARSE_CSR: + return sparsity::TfLiteDimensionType::kTfLiteDimSparseCSR; + } + return sparsity::TfLiteDimensionType::kTfLiteDimDense; +} + +sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data) +{ + auto type = data.type(); + switch (type) + { + case luci::SparseIndexVectorType::NONE: + { + std::vector empty; + return makeTfLiteArray(empty); + } + case luci::SparseIndexVectorType::I32: + return makeTfLiteArray(*data.as_int32_vector()); + case luci::SparseIndexVectorType::U16: + return makeTfLiteArray(*data.as_uint16_vector()); + case luci::SparseIndexVectorType::U8: + return makeTfLiteArray(*data.as_uint8_vector()); + default: + INTERNAL_EXN_V("unsupported SparseIndexVectorType", oops::to_uint32(type)); + } +} + +sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp) +{ + sparsity::TfLiteSparsity tflsp; + tflsp.traversal_order = makeTfLiteArray(sp->traversal_order); + tflsp.block_map = makeTfLiteArray(sp->block_map); + tflsp.dim_metadata = makeTfLiteDimensionMetadata(sp->dim_metadata); + tflsp.dim_metadata_size = sp->dim_metadata.size(); + return tflsp; +} + +template sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector &data) +{ + size_t cn = data.size(); + size_t sz = 1 + data.size(); + sparsity::TfLiteIntArray *sp = (sparsity::TfLiteIntArray *)(new int[sz]); + sp->size = cn; + for (size_t i = 0; i < cn; ++i) + { + sp->data[i] = data[i]; + } + return sp; +} + +sparsity::TfLiteDimensionMetadata * +makeTfLiteDimensionMetadata(const std::vector &data) +{ + size_t cn = data.size(); + sparsity::TfLiteDimensionMetadata *tfldm = new sparsity::TfLiteDimensionMetadata[cn]; + + for (size_t i = 0; i < cn; ++i) + { + tfldm[i].format = to_tflite_sparsity(data[i].format()); + tfldm[i].dense_size = data[i].dense_size(); + tfldm[i].array_segments = to_tflite_sparsity(data[i].array_segments()); + tfldm[i].array_indices = to_tflite_sparsity(data[i].array_indices()); + } + + return tfldm; +} + +void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp) +{ + assert(tflsp.traversal_order); + assert(tflsp.block_map); + delete[] tflsp.traversal_order; + delete[] tflsp.block_map; + + for (int i = 0; i < tflsp.dim_metadata_size; ++i) + { + assert(tflsp.dim_metadata[i].array_segments); + assert(tflsp.dim_metadata[i].array_indices); + delete[] tflsp.dim_metadata[i].array_segments; + delete[] tflsp.dim_metadata[i].array_indices; + } +} + +} // namespace luci diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.h b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h new file mode 100644 index 0000000..fcd9bbc --- /dev/null +++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ +#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ + +#include +#include + +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +// Storage format of each dimension in a sparse tensor. +typedef enum TfLiteDimensionType +{ + kTfLiteDimDense = 0, + kTfLiteDimSparseCSR, +} TfLiteDimensionType; + +// Fixed size list of integers. Used for dimensions and inputs/outputs tensor +// indices +typedef struct TfLiteIntArray +{ + int size; + int data[]; +} TfLiteIntArray; + +// Metadata to encode each dimension in a sparse tensor. +typedef struct TfLiteDimensionMetadata +{ + TfLiteDimensionType format; + int dense_size; + TfLiteIntArray *array_segments; + TfLiteIntArray *array_indices; +} TfLiteDimensionMetadata; + +// Parameters used to encode a sparse tensor. For detailed explanation of each +// field please refer to lite/schema/schema.fbs. +typedef struct TfLiteSparsity +{ + TfLiteIntArray *traversal_order; + TfLiteIntArray *block_map; + TfLiteDimensionMetadata *dim_metadata; + int dim_metadata_size; +} TfLiteSparsity; + +// A converter that keeps an internal representation of sparse tensor parameters +// and converts tensors between dense and sparse formats. +template class FormatConverter +{ +public: + /* Creates a sparse to dense converter. + * @param shape Shape of the target dense tensor. + * @param sparsity Sparsity parameter of the sparse TfLiteTensor. + */ + FormatConverter(const std::vector &shape, const TfLiteSparsity &sparsity); + + const std::vector &GetData() { return data_; } + const std::vector> &GetDimMetadata() { return dim_metadata_; } + + bool SparseToDense(const T *src_data); + +private: + // Helper function for initializing this converter for sparse to dense + // conversion. + void InitSparseToDenseConverter(std::vector shape, std::vector traversal_order, + std::vector format, + std::vector dense_size, + std::vector> segments, + std::vector> indices, + std::vector block_map); + + void Populate(const T *src_data, std::vector indices, int level, int prev_idx, + int *src_data_ptr, T *dest_data); + +private: + std::vector dense_shape_; + std::vector blocked_shape_; + size_t dense_size_; + std::vector traversal_order_; + std::vector format_; + std::vector block_size_; + std::vector block_map_; + std::vector> dim_metadata_; + std::vector data_; +}; + +extern template class FormatConverter; +extern template class FormatConverter; + +} // namespace sparsity + +#include + +namespace luci +{ + +sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt); +sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data); +sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp); + +template sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector &data); +sparsity::TfLiteDimensionMetadata * +makeTfLiteDimensionMetadata(const std::vector &data); + +void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp); + +} // namespace luci + +#endif // __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake index e896188..0a5e6a5 100644 --- a/compiler/luci/requires.cmake +++ b/compiler/luci/requires.cmake @@ -10,4 +10,5 @@ require("oops") require("hermes") require("hermes-std") require("tflchef") +require("circlechef") require("tflite2circle") diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h index 99e4561..95f06db 100644 --- a/compiler/luci/service/src/CircleCloneNode.h +++ b/compiler/luci/service/src/CircleCloneNode.h @@ -72,6 +72,7 @@ public: CloneNodeLet(loco::Graph *graph) : _graph(graph){}; public: + luci::CircleNode *visit(const luci::CircleDensify *) final; luci::CircleNode *visit(const luci::CircleDepthToSpace *) final; luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final; luci::CircleNode *visit(const luci::CircleDequantize *) final; diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index 9d156f3..a368fae 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -204,6 +204,7 @@ template loco::NodeShape broadcast_xy(const CIRCLENODE *node) return loco::NodeShape{inputs_shape}; \ } +DECLARE_USE_SINGLE(input); DECLARE_USE_SINGLE(inputs); DECLARE_USE_SINGLE(x); DECLARE_USE_SINGLE(logits); @@ -258,10 +259,10 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node) return loco::NodeShape{shape}; } -loco::NodeShape infer_arg_max(const luci::CircleArgMax *node) +template loco::NodeShape infer_arg_maxmin(const CIRCLENODE *node) { - auto input_shape = luci::shape_get(node->input()).as(); - auto dimension_shape = luci::shape_get(node->dimension()).as(); + auto input_shape = luci::shape_get(node->input()).template as(); + auto dimension_shape = luci::shape_get(node->dimension()).template as(); int64_t select_axis = 0; { @@ -271,55 +272,19 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node) // Support S32 for now. auto const_shape_node = loco::must_cast(node->dimension()); LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMax"); + "Only support int32 CircleConst for CircleArgMax/CircleArgMin"); if (const_shape_node->rank() > 1) INTERNAL_EXN_V("Only support rank 0/1 CircleConst", oops::to_uint32(const_shape_node->rank())); - select_axis = const_shape_node->scalar(); - } - assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks - - // NOTE select_axis is removed - loco::TensorShape shape_output; - uint32_t rank = input_shape.rank(); - uint32_t shrink = static_cast(select_axis); - assert(rank > 0); - shape_output.rank(rank - 1); - for (uint32_t r = 0, d = 0; r < rank; ++r) - { - if (r == shrink) - continue; - shape_output.dim(d++) = input_shape.dim(r); + select_axis = const_shape_node->template scalar(); } - return loco::NodeShape{shape_output}; -} - -loco::NodeShape infer_arg_min(const luci::CircleArgMin *node) -{ - auto input_shape = luci::shape_get(node->input()).as(); - auto dimension_shape = luci::shape_get(node->dimension()).as(); - - int64_t select_axis = 0; - { - LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); - - // Only support node's shape() is CircleConst with S32/S64 - // Support S32 for now. - auto const_shape_node = loco::must_cast(node->dimension()); - LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMin"); - - if (const_shape_node->rank() > 1) - INTERNAL_EXN_V("Only support rank 0/1 CircleConst", - oops::to_uint32(const_shape_node->rank())); - select_axis = const_shape_node->scalar(); - } assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks + + if (select_axis < 0) + select_axis += input_shape.rank(); // NOTE select_axis is removed loco::TensorShape shape_output; @@ -1180,45 +1145,17 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node) return loco::NodeShape{output_shape}; } -loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node) +template loco::NodeShape infer_resize_type(const CIRCLENODE *node) { - auto input_shape = luci::shape_get(node->input()).as(); - - if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeBilinear input to have rank 4"); - - auto *const_node = loco::must_cast(node->size()); - - if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size"); - - if (const_node->rank() != 1) - INTERNAL_EXN("Expected size tensor of rank 1"); - - if (const_node->dim(0).value() != 2) - INTERNAL_EXN("Expected size tensor with shape [2]"); - - loco::TensorShape output_shape; - output_shape.rank(4); - output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at(0); - output_shape.dim(2) = const_node->at(1); - output_shape.dim(3) = input_shape.dim(3); - - return loco::NodeShape{output_shape}; -} - -loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node) -{ - auto input_shape = luci::shape_get(node->input()).as(); + auto input_shape = luci::shape_get(node->input()).template as(); if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4"); + INTERNAL_EXN("Expected input to have rank 4"); auto *const_node = loco::must_cast(node->size()); if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size"); + INTERNAL_EXN("Only S32 datatype is supported for size"); if (const_node->rank() != 1) INTERNAL_EXN("Expected size tensor of rank 1"); @@ -1229,8 +1166,8 @@ loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNei loco::TensorShape output_shape; output_shape.rank(4); output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at(0); - output_shape.dim(2) = const_node->at(1); + output_shape.dim(1) = const_node->template at(0); + output_shape.dim(2) = const_node->template at(1); output_shape.dim(3) = input_shape.dim(3); return loco::NodeShape{output_shape}; @@ -2080,9 +2017,9 @@ public: loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); } - loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); } + loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_maxmin(node); } - loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); } + loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_maxmin(node); } loco::NodeShape visit(const luci::CircleAveragePool2D *node) final { @@ -2119,6 +2056,8 @@ public: loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); } + loco::NodeShape visit(const luci::CircleDensify *node) final { return use_input(node); } + loco::NodeShape visit(const luci::CircleDepthToSpace *node) final { return infer_depth_to_space(node); @@ -2348,12 +2287,12 @@ public: loco::NodeShape visit(const luci::CircleResizeBilinear *node) final { - return infer_resize_bilinear(node); + return infer_resize_type(node); } loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final { - return infer_resize_nearest_neighbor(node); + return infer_resize_type(node); } loco::NodeShape visit(const luci::CircleReverseSequence *node) final diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp index 438c4a3..7616390 100644 --- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp @@ -102,6 +102,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitordtype(); } + loco::DataType visit(const luci::CircleDensify *node) final + { + return luci::dtype_get(node->input()); + } + loco::DataType visit(const luci::CircleDepthToSpace *node) final { return luci::dtype_get(node->input()); diff --git a/compiler/luci/service/src/Nodes/CircleDensify.cpp b/compiler/luci/service/src/Nodes/CircleDensify.cpp new file mode 100644 index 0000000..a0d15b6 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleDensify.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleCloneNode.h" + +namespace luci +{ + +luci::CircleNode *CloneNodeLet::visit(const luci::CircleDensify *) +{ + return _graph->nodes()->create(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleDensify.test.cpp b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 0000000..d0f32c1 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleNodeClone.h" + +#include + +TEST(CloneNodeTest, clone_Densify) +{ + auto g = loco::make_graph(); + auto node_densify = g->nodes()->create(); + + auto gc = loco::make_graph(); + auto cloned = luci::clone_node(node_densify, gc.get()); + ASSERT_NE(nullptr, cloned); + ASSERT_EQ(gc.get(), cloned->graph()); + + auto cloned_densify = dynamic_cast(cloned); + ASSERT_NE(nullptr, cloned_densify); +} diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp index c5864f9..77135cc 100644 --- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp +++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp @@ -24,16 +24,22 @@ #include #include +#include #include #include #include +// code referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/strided_slice.cc +// tensorflow/lite/kernels/internal/strided_slice_logic.h + namespace { -// This Op only supports 1-4D cases and since we use the reference 4D +// This Op only supports 1-5D cases and since we use the reference 4D // implementation, the 1-3D tensors are mapped to 4D. -const int kMaxDim = 4; +const int kMaxDim = 5; const loco::DataType S32 = loco::DataType::S32; @@ -42,18 +48,47 @@ using int16 = int16_t; struct StridedSliceParams { - int8 start_indices_count; + int8 start_indices_count = 0; int16 start_indices[kMaxDim]; - int8 stop_indices_count; + int8 stop_indices_count = 0; int16 stop_indices[kMaxDim]; - int8 strides_count; + int8 strides_count = 0; int16 strides[kMaxDim]; - int16 begin_mask; - int16 ellipsis_mask; - int16 end_mask; - int16 new_axis_mask; - int16 shrink_axis_mask; + int16 begin_mask = 0; + int16 ellipsis_mask = 0; + int16 end_mask = 0; + int16 new_axis_mask = 0; + int16 shrink_axis_mask = 0; +}; + +struct StridedSliceContext +{ + StridedSliceContext(const luci::CircleStridedSlice *node) + { + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + input = loco::must_cast(node->input()); + begin = loco::must_cast(node->begin()); + end = loco::must_cast(node->end()); + strides = loco::must_cast(node->strides()); + + loco::TensorShape input_shape = luci::shape_get(input).as(); + input_dims = input_shape.rank(); + } + StridedSliceParams params; + luci::CircleNode *input = nullptr; + luci::CircleConst *begin = nullptr; + luci::CircleConst *end = nullptr; + luci::CircleConst *strides = nullptr; + + // Equivalent input shape after adding axis according to new_axis_mask. + loco::TensorShape effective_input_shape; + uint32_t input_dims = 0; }; // Use until std::clamp() is available from C++17. @@ -70,8 +105,8 @@ inline int Clamp(const int32_t v, const int32_t lo, const int32_t hi) // Return the index for the first element along that axis. This index will be a // positive integer between [0, axis_size - 1] that can be used to index // directly into the data. -inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, - uint32_t axis) +inline int32_t StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, + uint32_t axis) { const auto begin_mask = params.begin_mask; const auto *start_indices = params.start_indices; @@ -108,7 +143,16 @@ inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShap } // Clamping - start = Clamp(start, 0, axis_size - 1); + if (strides[axis] > 0) + { + // Forward iteration + start = Clamp(start, 0, axis_size); + } + else + { + // Backward iteration + start = Clamp(start, -1, axis_size - 1); + } return start; } @@ -118,14 +162,14 @@ inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShap // element. ie. So if you were iterating through all elements of a 1D array of // size 4, this function would return 4 as the stop, because it is one past the // "real" indices of 0, 1, 2 & 3. -inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, - int axis, int start_for_axis) +inline int32_t StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, + int32_t axis, int32_t start_for_axis) { const auto end_mask = params.end_mask; const auto shrink_axis_mask = params.shrink_axis_mask; const auto *stop_indices = params.stop_indices; const auto *strides = params.strides; - const int axis_size = static_cast(input_shape.dim(axis).value()); + const int32_t axis_size = static_cast(input_shape.dim(axis).value()); if (axis_size == 0) { return 0; @@ -141,7 +185,7 @@ inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape // already been adjusted for negative indices. if (shrink_axis) { - stop = start_for_axis + 1; + return start_for_axis + 1; } // end_mask override @@ -183,37 +227,125 @@ inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape return stop; } -StridedSliceParams BuildStridedSliceParams(const luci::CircleStridedSlice *node) +StridedSliceParams BuildStridedSliceParams(StridedSliceContext *op_context) { StridedSliceParams op_params; - if (kMaxDim < node->rank()) + // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks + // are processed here to update begin_mask, end_mask and the index range. + op_params.begin_mask = 0; + op_params.ellipsis_mask = 0; + op_params.end_mask = 0; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = 0; + + // Count indexes where the new_axis_mask is set but the ellipsis_mask is not. + loco::TensorShape begin_shape = luci::shape_get(op_context->begin).as(); + const uint32_t begin_count = begin_shape.dim(0).value(); + uint32_t num_add_axis = 0; + for (uint32_t i = 0; i < begin_count; ++i) { - INTERNAL_EXN_V("Cannot support StridedSlice rank > ", kMaxDim); + if (!((1 << i) & op_context->params.ellipsis_mask) && + ((1 << i) & op_context->params.new_axis_mask)) + { + num_add_axis++; + } } - auto begin_node = loco::must_cast(node->begin()); - auto end_node = loco::must_cast(node->end()); - auto strides_node = loco::must_cast(node->strides()); + // Calculate the dims of input after adding new axises. + const uint32_t effective_dims = op_context->input_dims + num_add_axis; + + // If begin, end and strides are not fully provided, it means Ellipsis should + // be expanded to multiple dimensions (Ex: for spec [Ellipsis, 2] on a 3D + // input, the Ellipsis should be applied for the first 2 dimensions). Besides, + // If the new_axis_mask and the ellipsis_mask are set at the same index, the + // new_axis_mask will have no effect. + int32_t effective_ellipsis_mask = 0, effective_new_axis_mask = 0; + uint32_t ellipsis_start_idx = effective_dims, expanded_ellipsis = 0; + for (uint32_t i = 0; i < effective_dims;) + { + if ((1 << i) & op_context->params.ellipsis_mask) + { + ellipsis_start_idx = i; + uint32_t ellipsis_end_idx = + std::max(i + 1, std::min(i + 1 + num_add_axis + op_context->input_dims - begin_count, + effective_dims)); + expanded_ellipsis = ellipsis_end_idx - ellipsis_start_idx - 1; + + // Set bit for effective_ellipsis_mask. + for (; i < ellipsis_end_idx; ++i) + { + effective_ellipsis_mask |= (1 << i); + } + continue; + } - uint32_t dims_count = begin_node->size(); + if ((1 << (i - expanded_ellipsis)) & op_context->params.new_axis_mask) + { + effective_new_axis_mask |= (1 << i); + } + ++i; + } - op_params.start_indices_count = dims_count; - op_params.stop_indices_count = dims_count; - op_params.strides_count = dims_count; + // Calculate effective_input_shape and its corresponding begin, end, strides. + loco::TensorShape input_shape = luci::shape_get(op_context->input).as(); + uint32_t added_ellipsis = 0, added_axises = 0; + op_context->effective_input_shape.rank(effective_dims); - for (uint32_t i = 0; i < dims_count; ++i) + for (uint32_t i = 0; i < effective_dims; ++i) { - op_params.start_indices[i] = begin_node->at(i); - op_params.stop_indices[i] = end_node->at(i); - op_params.strides[i] = strides_node->at(i); + if ((1 << i) & effective_ellipsis_mask) + { + // If ellipsis_mask, set the begin_mask and end_mask at that index. + added_ellipsis = std::max(0u, i - ellipsis_start_idx); + op_params.begin_mask |= (1 << i); + op_params.end_mask |= (1 << i); + op_params.strides[i] = 1; + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } + else if ((1 << i) & effective_new_axis_mask) + { + // If new_axis_mask is set, it is equivalent to adding a new dim of 1 to + // input tensor. Store added shape to effective_input_shape. + op_params.start_indices[i] = 0; + op_params.stop_indices[i] = 1; + op_params.strides[i] = 1; + op_context->effective_input_shape.dim(i) = loco::Dimension(1); + added_axises++; + } + else if (i >= begin_count + expanded_ellipsis) + { + op_params.start_indices[i] = 0; + op_params.stop_indices[i] = 0; + op_params.strides[i] = 1; + op_params.begin_mask |= (1 << i); + op_params.end_mask |= (1 << i); + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } + else + { + const uint32_t orig_idx = i - added_ellipsis; + op_params.start_indices[i] = op_context->begin->at(orig_idx); + op_params.stop_indices[i] = op_context->end->at(orig_idx); + op_params.strides[i] = op_context->strides->at(orig_idx); + if (op_context->params.begin_mask & (1 << orig_idx)) + { + op_params.begin_mask |= (1 << i); + } + if (op_context->params.end_mask & (1 << orig_idx)) + { + op_params.end_mask |= (1 << i); + } + if (op_context->params.shrink_axis_mask & (1 << orig_idx)) + { + op_params.shrink_axis_mask |= (1 << i); + } + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } } - - op_params.begin_mask = node->begin_mask(); - op_params.ellipsis_mask = 0; - op_params.end_mask = node->end_mask(); - op_params.new_axis_mask = 0; - op_params.shrink_axis_mask = node->shrink_axis_mask(); + op_params.start_indices_count = effective_dims; + op_params.stop_indices_count = effective_dims; + op_params.strides_count = effective_dims; return op_params; } @@ -241,55 +373,54 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node) LUCI_ASSERT(end_node->dtype() == S32, "Only support S32 for end_node"); LUCI_ASSERT(strides_node->dtype() == S32, "Only support S32 for strides_node"); - assert(node->ellipsis_mask() == 0); - assert(node->new_axis_mask() == 0); + LUCI_ASSERT(begin_node->rank() == 1, "Only support rank 1 for begin_node"); + LUCI_ASSERT(end_node->rank() == 1, "Only support rank 1 for end_node"); + LUCI_ASSERT(strides_node->rank() == 1, "Only support rank 1 for strides_node"); - auto op_params = BuildStridedSliceParams(node); loco::TensorShape input_shape = luci::shape_get(input_node).as(); - uint32_t num_input_axes = input_shape.rank(); - assert(begin_node->size() <= num_input_axes); - assert(end_node->size() <= num_input_axes); - assert(strides_node->size() <= num_input_axes); - for (uint32_t i = 0; i < strides_node->size(); i++) - { - LUCI_ASSERT(strides_node->at(i) != 0, "Stride value has to be non-zero"); - } + assert(begin_node->size() <= input_shape.rank()); + assert(end_node->size() <= input_shape.rank()); + assert(strides_node->size() <= input_shape.rank()); - uint32_t shape_size = 0; - std::array output_shape_data; + StridedSliceContext op_context(node); + auto op_params = BuildStridedSliceParams(&op_context); + auto effective_input_shape = op_context.effective_input_shape; + std::vector output_shape_vector; - for (uint32_t idx = 0; idx < num_input_axes; ++idx) + for (int32_t idx = effective_input_shape.rank() - 1; idx >= 0; --idx) { - int32_t begin = StartForAxis(op_params, input_shape, idx); - int32_t end = StopForAxis(op_params, input_shape, idx, begin); - if (end < 0) - end = input_shape.dim(idx).value() + end + 1; + int32_t stride = op_params.strides[idx]; + LUCI_ASSERT(stride != 0, "stride value has to be non-zero"); - // This is valid for both positive and negative strides - int32_t stride = strides_node->at(idx); - int32_t dim_shape = std::ceil(static_cast(end - begin) / stride); - assert(dim_shape > 0); + int32_t begin = StartForAxis(op_params, effective_input_shape, idx); + int32_t end = StopForAxis(op_params, effective_input_shape, idx, begin); // When shrinking an axis, the end position does not matter (and can be // incorrect when negative indexing is used, see Issue #19260). Always use // begin + 1 to generate a length 1 slice, since begin has - // already been adjusted for negative indices by StartForAxis. - const bool shrink_axis = node->shrink_axis_mask() & (1 << idx); + // already been adjusted for negative indices by GetBeginValueAtIndex. + const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx); if (shrink_axis) { - assert(dim_shape == 1); + end = begin + 1; } - else + + // This is valid for both positive and negative strides + int32_t dim_shape = std::ceil((end - begin) / static_cast(stride)); + dim_shape = dim_shape < 0 ? 0 : dim_shape; + if (!shrink_axis) { - output_shape_data[shape_size++] = dim_shape; + output_shape_vector.push_back(dim_shape); } } + auto shape_size = output_shape_vector.size(); output_shape.rank(shape_size); for (uint32_t idx = 0; idx < shape_size; ++idx) { - output_shape.dim(idx) = output_shape_data[idx]; + // reverse copy + output_shape.dim(idx) = output_shape_vector.at(shape_size - 1u - idx); } return output_shape; diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst index 94e723f..09a25ff 100644 --- a/compiler/luci/tests/test.lst +++ b/compiler/luci/tests/test.lst @@ -39,6 +39,7 @@ addread(Conv2D_003) addread(Conv2D_U8_000) addread(Conv2D_U8_001) addread(Cos_000) +addread(Densify_000) addread(DepthToSpace_000) addread(DepthwiseConv2D_000) addread(DepthwiseConv2D_U8_000) @@ -265,6 +266,7 @@ addwrite(Conv2D_003) addwrite(Conv2D_U8_000) addwrite(Conv2D_U8_001) addwrite(Cos_000) +addwrite(Densify_000) addwrite(DepthToSpace_000) addwrite(DepthwiseConv2D_000) addwrite(DepthwiseConv2D_U8_000) diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h index d3ffc23..7a1ba2b 100644 --- a/compiler/mio-circle04/include/mio_circle/Helper.h +++ b/compiler/mio-circle04/include/mio_circle/Helper.h @@ -19,6 +19,8 @@ #include +#include + namespace mio { namespace circle @@ -31,6 +33,21 @@ std::string opcode_name(const ::circle::OperatorCode *opcode); const char *tensor_type(const ::circle::Tensor *tensor); const char *tensor_name(const ::circle::Tensor *tensor); +template std::vector as_index_vector(const flatbuffers::Vector *flat_array) +{ + if (flat_array == nullptr) + { + throw std::runtime_error("flat array is nullptr"); + } + + std::vector ret(flat_array->Length()); + for (uint32_t i = 0; i < flat_array->Length(); i++) + { + ret[i] = flat_array->Get(i); + } + return ret; +} + } // namespace circle } // namespace mio diff --git a/compiler/mio-circle04/include/mio_circle/Reader.h b/compiler/mio-circle04/include/mio_circle/Reader.h new file mode 100644 index 0000000..6306467 --- /dev/null +++ b/compiler/mio-circle04/include/mio_circle/Reader.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_CIRCLE04_READER_H__ +#define __MIO_CIRCLE04_READER_H__ + +#include + +#include +#include +#include + +// NOTE Reader class originated from circledump and for circle-tensordump +// where this class has more work to be done for stability +// as the tools are for developers not customores. + +namespace mio +{ +namespace circle +{ + +/** + * @brief Loads Circle file and provides helpers to access attributes + */ +class Reader +{ +private: + using CircleSubGraphs_t = flatbuffers::Vector>; + using CircleBuffers_t = flatbuffers::Vector>; + using CircleTensors_t = flatbuffers::Vector>; + using CircleOperators_t = flatbuffers::Vector>; + using CircleMetadata_t = flatbuffers::Vector>; + using CircleSignatureDef_t = flatbuffers::Vector>; + +public: + Reader(const ::circle::Model *model); + + Reader() = delete; + +public: + uint32_t version() const { return _version; } + + const std::vector &opcodes() { return _op_codes; } + const CircleBuffers_t *buffers() { return _buffers; } + const CircleTensors_t *tensors() { return _tensors; } + const CircleOperators_t *operators() { return _operators; } + const std::vector &inputs() const { return _inputs; } + const std::vector &outputs() const { return _outputs; } + const ::circle::DataFormat &data_format() const { return _data_format; } + const CircleMetadata_t *metadata() const { return _metadata; } + const CircleSignatureDef_t *signature_defs() const { return _signature_defs; } + + uint32_t num_subgraph() const { return _subgraphs->Length(); } + + size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); + ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const; + std::string opcode_name(const ::circle::Operator *op) const; + std::vector outputs(const ::circle::Operator *op) const; + std::string tensor_name(const ::circle::Tensor *tensor) const; + std::string tensor_dtype(const ::circle::Tensor *tensor) const; + +public: + bool select_subgraph(uint32_t subgraph); + const std::string &subgraph_name(void) const { return _subgraph_name; } + uint32_t subgraph_index(void) const { return _subgraph_index; } + +private: + uint32_t _version; + + const CircleSubGraphs_t *_subgraphs{nullptr}; + const CircleBuffers_t *_buffers{nullptr}; + const CircleTensors_t *_tensors{nullptr}; + const CircleOperators_t *_operators{nullptr}; + const CircleMetadata_t *_metadata{nullptr}; + const CircleSignatureDef_t *_signature_defs{nullptr}; + + uint32_t _subgraph_index = 0; + std::string _subgraph_name; + std::vector _op_codes; + std::vector _inputs; + std::vector _outputs; + ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST; +}; + +} // namespace circle +} // namespace mio + +#endif // __MIO_CIRCLE04_READER_H__ diff --git a/compiler/mio-circle04/src/Reader.cpp b/compiler/mio-circle04/src/Reader.cpp new file mode 100644 index 0000000..880ffae --- /dev/null +++ b/compiler/mio-circle04/src/Reader.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Reader.h" +#include "mio_circle/Helper.h" + +#include +#include + +namespace mio +{ +namespace circle +{ + +Reader::Reader(const ::circle::Model *model) +{ + if (model == nullptr) + { + throw std::runtime_error("Invalid model"); + } + + _version = model->version(); + _subgraphs = model->subgraphs(); + _buffers = model->buffers(); + _metadata = model->metadata(); + _signature_defs = model->signature_defs(); + + auto opcodes = model->operator_codes(); + for (const ::circle::OperatorCode *opcode : *opcodes) + { + _op_codes.push_back(opcode); + } +} + +size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) +{ + if (buff_data != nullptr) + { + *buff_data = nullptr; + } + + if (buf_idx == 0) + return 0; + + if (auto *buffer = (*_buffers)[buf_idx]) + { + if (auto *array = buffer->data()) + { + if (size_t size = array->size()) + { + if (buff_data != nullptr) + { + *buff_data = reinterpret_cast(array->data()); + } + return size; + } + } + } + + return 0; +} + +::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const +{ + uint32_t index = op->opcode_index(); + assert(index < _op_codes.size()); + const ::circle::OperatorCode *opcode = _op_codes.at(index); + + return mio::circle::builtin_code_neutral(opcode); +} + +std::string Reader::opcode_name(const ::circle::Operator *op) const +{ + uint32_t index = op->opcode_index(); + assert(index < _op_codes.size()); + const ::circle::OperatorCode *opcode = _op_codes.at(index); + + if (!mio::circle::is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid: " << index << ")"; + return oss.str(); + } + + return mio::circle::opcode_name(opcode); +} + +std::vector Reader::outputs(const ::circle::Operator *op) const +{ + return as_index_vector(op->outputs()); +} + +std::string Reader::tensor_name(const ::circle::Tensor *tensor) const +{ + return mio::circle::tensor_name(tensor); +} + +std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const +{ + return mio::circle::tensor_type(tensor); +} + +bool Reader::select_subgraph(uint32_t sgindex) +{ + _subgraph_index = sgindex; + _tensors = nullptr; + _operators = nullptr; + + _inputs.clear(); + _outputs.clear(); + + if (_subgraphs->Length() <= sgindex) + { + assert(false); + return false; + } + + const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; + + auto name = subgraph->name(); + _subgraph_name = name ? name->c_str() : "(noname)"; + + _tensors = subgraph->tensors(); + _operators = subgraph->operators(); + _data_format = subgraph->data_format(); + + _inputs = as_index_vector(subgraph->inputs()); + _outputs = as_index_vector(subgraph->outputs()); + + return true; +} + +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle04/src/Reader.test.cpp b/compiler/mio-circle04/src/Reader.test.cpp new file mode 100644 index 0000000..104454a --- /dev/null +++ b/compiler/mio-circle04/src/Reader.test.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Reader.h" + +#include +#include + +class mio_circle04_reader_test : public ::testing::Test +{ +protected: + void initialization_emty(void) + { + _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec); + circle::FinishModelBuffer(_fbb, _model); + } + + const circle::Model *circleModel(void) + { + auto ptr = _fbb.GetBufferPointer(); + return circle::GetModel(ptr); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + flatbuffers::Offset _model; + std::vector> _opcodes_vec; +}; + +TEST_F(mio_circle04_reader_test, null_Model_NEG) +{ + EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error); +} + +TEST_F(mio_circle04_reader_test, empty_Model) +{ + initialization_emty(); + + const circle::Model *model = circleModel(); + EXPECT_NE(nullptr, model); + + mio::circle::Reader reader(model); + + SUCCEED(); +} + +// TODO add more tests diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md index 187b1a5..c717ab8 100644 --- a/compiler/mio-tflite/README.md +++ b/compiler/mio-tflite/README.md @@ -1,3 +1,5 @@ # mio-tflite _mio-tflite_ provides a library to access TensorFlow lite model files + +NOTE: _mio-tflite_ is currently obsolete diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md index 970569b..86d2998 100644 --- a/compiler/mio-tflite260/README.md +++ b/compiler/mio-tflite260/README.md @@ -1,3 +1,5 @@ # mio-tflite260 _mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0. + +NOTE: _mio-tflite260_ is currently obsolete diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h index bf94cfb..37bfdb3 100644 --- a/compiler/mir/include/mir/Graph.h +++ b/compiler/mir/include/mir/Graph.h @@ -103,6 +103,10 @@ private: /** * @brief Returns nodes of the graph sorted topologically. + * @note Sorting order priority + * 1) Graph input node (input index order) + * 2) Constant node (unordered - cannot predict order) + * 3) Ready node (unordered - cannot predict order) */ std::vector getSortedNodes(Graph *graph); diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp index 04b005d..05d6dc9 100644 --- a/compiler/mir/src/Graph.cpp +++ b/compiler/mir/src/Graph.cpp @@ -44,9 +44,16 @@ std::vector getSortedNodes(Graph *graph) std::deque ready_nodes; std::unordered_map num_visited_input_edges; + // Use input vector first to maintain correct input order + for (Operation *op : graph->getInputs()) + { + ready_nodes.push_back(op); + } + for (Operation *op : graph->getNodes()) { - if (op->getNumInputs() == 0) + // Skip already pushed input node + if ((op->getNumInputs() == 0) && (op->getType() != Operation::Type::input)) { ready_nodes.push_back(op); } diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp index 92ab994..244c92a 100644 --- a/compiler/mir2loco/src/mir2loco.test.cpp +++ b/compiler/mir2loco/src/mir2loco.test.cpp @@ -383,28 +383,49 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test) auto loco_graph = transformer.transform(&mir_graph); loco::Pull *pull_node = dynamic_cast(loco_graph->nodes()->at(0)); - loco::ConstGen *const_node = dynamic_cast(loco_graph->nodes()->at(1)); - loco::FeatureEncode *encode_node = - dynamic_cast(loco_graph->nodes()->at(2)); - loco::FilterEncode *filter_node = dynamic_cast(loco_graph->nodes()->at(3)); - loco::Conv2D *conv_node = dynamic_cast(loco_graph->nodes()->at(4)); - loco::FeatureDecode *decode_node = - dynamic_cast(loco_graph->nodes()->at(5)); - loco::Push *push_node = dynamic_cast(loco_graph->nodes()->at(6)); - ASSERT_NE(pull_node, nullptr); + + // ConstGen: Only one ConstGen node + // We can convince that this node is input of FilterEncode because this is only ConstGen node + loco::ConstGen *const_node = dynamic_cast(loco_graph->nodes()->at(1)); ASSERT_NE(const_node, nullptr); - ASSERT_NE(filter_node, nullptr); + + // FeatureEncode + auto pull_uses = loco::succs(pull_node); + ASSERT_EQ(pull_uses.size(), 1); + loco::FeatureEncode *encode_node = dynamic_cast(*pull_uses.begin()); ASSERT_NE(encode_node, nullptr); - ASSERT_NE(conv_node, nullptr); - ASSERT_NE(decode_node, nullptr); - ASSERT_NE(push_node, nullptr); ASSERT_EQ(encode_node->input(), pull_node); - ASSERT_EQ(filter_node->input(), const_node); + + // Conv2D + auto encode_uses = loco::succs(encode_node); + ASSERT_EQ(encode_uses.size(), 1); + loco::Conv2D *conv_node = dynamic_cast(*encode_uses.begin()); + ASSERT_NE(conv_node, nullptr); ASSERT_EQ(conv_node->ifm(), encode_node); + + // FilterEncode + auto const_uses = loco::succs(const_node); + ASSERT_EQ(const_uses.size(), 1); + loco::FilterEncode *filter_node = dynamic_cast(*const_uses.begin()); + ASSERT_NE(filter_node, nullptr); + ASSERT_EQ(filter_node->input(), const_node); ASSERT_EQ(conv_node->ker(), filter_node); + + // FeatureDecode + auto conv_uses = loco::succs(conv_node); + ASSERT_EQ(conv_uses.size(), 1); + loco::FeatureDecode *decode_node = dynamic_cast(*conv_uses.begin()); + ASSERT_NE(decode_node, nullptr); ASSERT_EQ(decode_node->input(), conv_node); + + // Push + auto decode_uses = loco::succs(decode_node); + ASSERT_EQ(decode_uses.size(), 1); + loco::Push *push_node = dynamic_cast(*decode_uses.begin()); + ASSERT_NE(push_node, nullptr); ASSERT_EQ(push_node->from(), decode_node); + // Check params ASSERT_EQ(conv_node->pad()->top(), 5); ASSERT_EQ(conv_node->pad()->left(), 9); diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp index 333f0f6..0659fd1 100644 --- a/compiler/moco/import/src/Importer.cpp +++ b/compiler/moco/import/src/Importer.cpp @@ -190,7 +190,7 @@ std::unique_ptr Importer::import(const ModelSignature &signature, convert_graph(*source_ptr, signature, tf_graph_def, graph.get()); - return std::move(graph); + return graph; } } // namespace moco diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp index 55c0e0c..b59a505 100644 --- a/compiler/moco/lang/src/IR/TFNode.cpp +++ b/compiler/moco/lang/src/IR/TFNode.cpp @@ -17,6 +17,7 @@ #include "moco/IR/TFNode.h" #include "moco/IR/TFDialect.h" +#include #include #include diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt index 8732340..90e989a 100644 --- a/compiler/one-cmds/CMakeLists.txt +++ b/compiler/one-cmds/CMakeLists.txt @@ -8,7 +8,9 @@ set(ONE_COMMAND_FILES one-optimize one-quantize one-pack + one-partition one-profile + one-infer one-codegen one-prepare-venv onecc @@ -74,7 +76,11 @@ endforeach(ONE_UTILITY) # make python directory set(ONE_PYTHON_FILES constant.py - make_cmd.py) + make_cmd.py + CfgRunner.py + OptionBuilder.py + TopologicalSortHelper.py + WorkflowRunner.py) foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES}) diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt index 690a607..2552a02 100644 --- a/compiler/one-cmds/dummy-driver/CMakeLists.txt +++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt @@ -1,16 +1,25 @@ # dummy driver for interface test set(DUMMY_DRIVER_SRC src/dummy-compile.cpp) set(HELP_DRIVER_SRC src/help-compile.cpp) +set(DUMMY_INFER_SRC src/dummy-infer.cpp) +set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp) +set(HELP_INFER_SRC src/help-infer.cpp) set(DUMMY_PROFILE_SRC src/dummy-profile.cpp) set(HELP_PROFILE_SRC src/help-profile.cpp) add_executable(dummy-compile ${DUMMY_DRIVER_SRC}) add_executable(help-compile ${HELP_DRIVER_SRC}) +add_executable(dummy-infer ${DUMMY_INFER_SRC}) +add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC}) +add_executable(help-infer ${HELP_INFER_SRC}) add_executable(dummy-profile ${DUMMY_PROFILE_SRC}) add_executable(help-profile ${HELP_PROFILE_SRC}) set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile") set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile") +set(DUMMY_INFER "${CMAKE_CURRENT_BINARY_DIR}/dummy-infer") +set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2") +set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer") set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile") set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile") @@ -26,6 +35,24 @@ install(FILES ${HELP_DRIVER} WORLD_READ WORLD_EXECUTE DESTINATION test) +install(FILES ${DUMMY_INFER} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + +install(FILES ${DUMMY_INFER_V2} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + +install(FILES ${HELP_INFER} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + install(FILES ${DUMMY_PROFILE} PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE diff --git a/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp new file mode 100644 index 0000000..60f5fae --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * dummy-infer only tests its interface rather than its functionality. + * + * ./dummy-infer ${INPUT_NAME} + * dummy-infer dummy output!!! + */ + +#include + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::cout << "dummy-infer dummy output!!!" << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp new file mode 100644 index 0000000..4b93c70 --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * dummy-infer only tests its interface rather than its functionality. + * + * ./dummy-infer ${INPUT_NAME} + * Do inference of ${INPUT_NAME} + */ + +#include + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::cout << "Do inference of " + std::string(argv[1]) << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/dummy-driver/src/help-infer.cpp b/compiler/one-cmds/dummy-driver/src/help-infer.cpp new file mode 100644 index 0000000..821d368 --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/help-infer.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * help-infer prints dummy help message. + * + * $ ./help-infer -h + * HELP MESSAGE!! + */ + +#include +#include +#include + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::string opt_h{"-h"}; + std::string argv_1{argv[1]}; + + if (opt_h != argv_1) + return EXIT_FAILURE; + + std::cout << "HELP MESSAGE!!" << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index ebc1651..2352bbd 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -153,6 +153,7 @@ Current transformation options are - expand_broadcast_const : This will expand broadcastable constant node inputs - fold_add_v2 : This removes AddV2 operation which can be folded - fold_cast : This removes Cast operation which can be folded +- fold_densify: This removes Densify operator which can be folded - fold_dequantize : This removes Dequantize operation which can be folded - fold_dwconv : This folds Depthwise Convolution operation which can be folded - fold_gather : This removes Gather operation which can be folded @@ -205,10 +206,6 @@ Current transformation options are - transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator. - transform_min_relu_to_relu6: This will transform Minimum(6)-Relu pattern to Relu6 operator. -There are options to enable multiple options at once for convenience. -- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul, - resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape - one-quantize ------------ diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build index 5c313b4..4b1f980 100644 --- a/compiler/one-cmds/one-build +++ b/compiler/one-cmds/one-build @@ -22,7 +22,6 @@ import argparse import configparser import os -import subprocess import sys import utils as _utils @@ -83,6 +82,7 @@ def _get_driver_name(driver_name): 'one-import-onnx': 'one-import-onnx', 'one-optimize': 'one-optimize', 'one-quantize': 'one-quantize', + 'one-partition': 'one-partition', 'one-pack': 'one-pack', 'one-codegen': 'one-codegen' }[driver_name] @@ -157,7 +157,8 @@ def main(): bin_dir = os.path.dirname(os.path.realpath(__file__)) import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) transform_drivers = [ - 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' + 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile', + 'one-partition' ] _verify_cfg(import_drivers_dict, config) diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg index e147896..4296081 100644 --- a/compiler/one-cmds/one-build.template.cfg +++ b/compiler/one-cmds/one-build.template.cfg @@ -5,6 +5,7 @@ one-import-bcq=False one-import-onnx=False one-optimize=True one-quantize=False +one-parition=False one-pack=True one-codegen=False diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen index 726538d..86e1632 100644 --- a/compiler/one-cmds/one-codegen +++ b/compiler/one-cmds/one-codegen @@ -25,9 +25,7 @@ import glob import itertools import ntpath import os -import subprocess import sys -import tempfile import shutil import utils as _utils diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq index ef89a92..c3ef0b2 100644 --- a/compiler/one-cmds/one-import-bcq +++ b/compiler/one-cmds/one-import-bcq @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import tempfile @@ -160,9 +159,9 @@ def _convert(args): tmpdir, os.path.splitext( os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite' - tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, - generate_bcq_metadata_output_path, - tf2tfliteV2_output_path) + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd( + args, tf2tfliteV2_path, generate_bcq_metadata_output_path, + tf2tfliteV2_output_path) try: output_arrays_idx = tf2tfliteV2_cmd.index('--output_arrays') tf2tfliteV2_cmd[output_arrays_idx + 1] = ','.join(bcq_output_arrays) @@ -177,8 +176,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx index eaa1361..ad19c2f 100644 --- a/compiler/one-cmds/one-import-onnx +++ b/compiler/one-cmds/one-import-onnx @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import tempfile import onnx @@ -80,6 +79,12 @@ def _get_parser(): parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') parser.add_argument( '--unroll_lstm', action='store_true', help='Unroll LSTM operators') + parser.add_argument( + '--keep_io_order', + action='store_true', + help= + 'Ensure generated circle model preserves the I/O order of the original onnx model.' + ) # save intermediate file(s) parser.add_argument( @@ -87,6 +92,12 @@ def _get_parser(): action='store_true', help='Save intermediate files to output folder') + # experimental options + parser.add_argument( + '--experimental_disable_batchmatmul_unfold', + action='store_true', + help='Experimental disable BatchMatMul unfold') + return parser @@ -124,6 +135,65 @@ def _apply_verbosity(verbosity): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +# The index of input/output is added in front of the name. For example, +# Original input names: 'a', 'c', 'b' +# Renamed: '0001_a', '0002_c', '0003_b' +# This will preserve I/O order after import. +def _remap_io_names(onnx_model): + # gather existing name of I/O and generate new name of I/O in sort order + input_nodes = [] + output_nodes = [] + remap_inputs = [] + remap_outputs = [] + initializers = [] + # some models may have initializers as inputs. ignore them. + for initializer in onnx_model.graph.initializer: + initializers.append(initializer.name) + for idx in range(0, len(onnx_model.graph.input)): + name = onnx_model.graph.input[idx].name + if not name in initializers: + input_nodes.append(name) + remap_inputs.append(format(idx + 1, '04d') + '_' + name) + for idx in range(0, len(onnx_model.graph.output)): + name = onnx_model.graph.output[idx].name + output_nodes.append(name) + remap_outputs.append(format(idx + 1, '04d') + '_' + name) + # change names for graph input + for i in range(len(onnx_model.graph.input)): + if onnx_model.graph.input[i].name in input_nodes: + to_rename = onnx_model.graph.input[i].name + idx = input_nodes.index(to_rename) + onnx_model.graph.input[i].name = remap_inputs[idx] + # change names of all nodes in the graph + for i in range(len(onnx_model.graph.node)): + # check node.input is to change to remap_inputs or remap_outputs + for j in range(len(onnx_model.graph.node[i].input)): + if onnx_model.graph.node[i].input[j] in input_nodes: + to_rename = onnx_model.graph.node[i].input[j] + idx = input_nodes.index(to_rename) + onnx_model.graph.node[i].input[j] = remap_inputs[idx] + if onnx_model.graph.node[i].input[j] in output_nodes: + to_rename = onnx_model.graph.node[i].input[j] + idx = output_nodes.index(to_rename) + onnx_model.graph.node[i].input[j] = remap_outputs[idx] + # check node.output is to change to remap_inputs or remap_outputs + for j in range(len(onnx_model.graph.node[i].output)): + if onnx_model.graph.node[i].output[j] in output_nodes: + to_rename = onnx_model.graph.node[i].output[j] + idx = output_nodes.index(to_rename) + onnx_model.graph.node[i].output[j] = remap_outputs[idx] + if onnx_model.graph.node[i].output[j] in input_nodes: + to_rename = onnx_model.graph.node[i].output[j] + idx = input_nodes.index(to_rename) + onnx_model.graph.node[i].output[j] = remap_inputs[idx] + # change names for graph output + for i in range(len(onnx_model.graph.output)): + if onnx_model.graph.output[i].name in output_nodes: + to_rename = onnx_model.graph.output[i].name + idx = output_nodes.index(to_rename) + onnx_model.graph.output[i].name = remap_outputs[idx] + + def _convert(args): _apply_verbosity(args.verbose) @@ -142,6 +212,13 @@ def _convert(args): options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn') options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm') onnx_legalizer.legalize(onnx_model, options) + if _utils._is_valid_attr(args, 'keep_io_order'): + _remap_io_names(onnx_model) + if _utils._is_valid_attr(args, 'save_intermediate'): + basename = os.path.basename(getattr(args, 'input_path')) + fixed_path = os.path.join(tmpdir, + os.path.splitext(basename)[0] + '~.onnx') + onnx.save(onnx_model, fixed_path) tf_savedmodel = onnx_tf.backend.prepare(onnx_model) savedmodel_name = os.path.splitext(os.path.basename( @@ -166,8 +243,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch index dbf1ba6..7f39e61 100644 --- a/compiler/one-cmds/one-import-pytorch +++ b/compiler/one-cmds/one-import-pytorch @@ -80,7 +80,8 @@ def _get_parser(): tf2tflite_group.add_argument('--converter_version', default='v2') parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') - parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators') + parser.add_argument( + '--unroll_lstm', action='store_true', help='Unroll LSTM operators') # save intermediate file(s) parser.add_argument( @@ -338,8 +339,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index 999255a..6623fa6 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -21,8 +21,6 @@ import argparse import os -import subprocess -import sys import tempfile import onelib.make_cmd as _make_cmd @@ -152,8 +150,8 @@ def _convert(args): tmpdir, os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite' tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, - getattr(args, 'input_path'), - tf2tfliteV2_output_path) + getattr(args, 'input_path'), + tf2tfliteV2_output_path) f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode()) @@ -163,8 +161,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite index 2d756bf..3d96b11 100644 --- a/compiler/one-cmds/one-import-tflite +++ b/compiler/one-cmds/one-import-tflite @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import onelib.make_cmd as _make_cmd @@ -83,8 +82,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - getattr(args, 'input_path'), - getattr(args, 'output_path')) + getattr(args, 'input_path'), + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-infer b/compiler/one-cmds/one-infer new file mode 100644 index 0000000..c7fcd8a --- /dev/null +++ b/compiler/one-cmds/one-infer @@ -0,0 +1,224 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import copy +import glob +import itertools +import ntpath +import os +import sys + +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +def _get_backends_list(): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-infer` finds its backends + - `bin` folder where `one-infer` exists + - `backends` folder + + NOTE If there are backends of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + backend_set = set() + + # bin folder + files = [f for f in glob.glob(dir_path + '/*-infer')] + # backends folder + files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)] + # TODO find backends in `$PATH` + + backends_list = [] + for cand in files: + base = ntpath.basename(cand) + if (not base in backend_set) and os.path.isfile(cand) and os.access( + cand, os.X_OK): + backend_set.add(base) + backends_list.append(cand) + + return backends_list + + +def _search_backend_driver(driver): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-infer` finds its backend driver + - `bin` folder where `one-infer` exists + - `backends/**/bin/` folder + + NOTE If there are drivers of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + + # CASE 1: one/bin/{driver} is found + driver_path = dir_path + '/' + driver + if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK): + return driver_path + + # CASE 2: one/backends/**/bin/{driver} is found + for driver_path in glob.glob( + dir_path + '/../backends/**/bin/' + driver, recursive=True): + if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK): + return driver_path + + # CASE 3: {driver} is found in nowhere + return None + + +def _get_parser(backends_list): + infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]' + parser = argparse.ArgumentParser( + description='command line tool to infer model', usage=infer_usage) + + _utils._add_default_arg(parser) + + # TODO: add tflite/onnx-infer driver to helper message when it is implemented + driver_help_message = 'backend inference driver name to execute' + parser.add_argument('-d', '--driver', type=str, help=driver_help_message) + + # get backend list in the directory + backends_name = [ntpath.basename(f) for f in backends_list] + if not backends_name: + backends_name_message = '(There is no available backend drivers)' + else: + backends_name_message = '(available backend drivers: ' + ', '.join( + backends_name) + ')' + backend_help_message = 'backend name to use ' + backends_name_message + parser.add_argument('-b', '--backend', type=str, help=backend_help_message) + + post_process_help_message = 'post processing script to convert I/O data to standard format' + parser.add_argument('--post-process', type=str, help=post_process_help_message) + + return parser + + +def _verify_arg(parser, args): + """verify given arguments""" + # `-d/--driver` and `-b/--backend` are mutually exclusive arguments. + if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'): + parser.error( + '-d and -b options are mutually exclusive. Please use only one of them') + + missing = [] + if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr( + args, 'backend'): + missing.append('{-d/--driver | -b/--backend}') + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + + +def _parse_arg(parser): + infer_args = [] + backend_args = [] + argv = copy.deepcopy(sys.argv) + # delete file name + del argv[0] + # split by '--' + args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x] + + # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER] + if len(args): + infer_args = args[0] + infer_args = parser.parse_args(infer_args) + backend_args = backend_args if len(args) < 2 else args[1] + # print version + if len(args) and infer_args.version: + _utils._print_version_and_exit(__file__) + + return infer_args, backend_args + + +def _get_executable(args, backends_list): + driver = _utils._is_valid_attr(args, 'driver') + if driver: + executable = _search_backend_driver(driver) + if executable: + return executable + else: + raise FileNotFoundError(driver + ' not found') + + if _utils._is_valid_attr(args, 'backend'): + backend_base = getattr(args, 'backend') + '-infer' + for cand in backends_list: + if ntpath.basename(cand) == backend_base: + return cand + raise FileNotFoundError(backend_base + ' not found') + + +def main(): + # get backend list + backends_list = _get_backends_list() + + # parse arguments + parser = _get_parser(backends_list) + args, backend_args = _parse_arg(parser) + + # parse configuration file + _utils._parse_cfg(args, 'one-infer') + + # verify arguments + _verify_arg(parser, args) + + # make a command to run given backend driver + driver_path = _get_executable(args, backends_list) + infer_cmd = [driver_path] + backend_args + if _utils._is_valid_attr(args, 'command'): + infer_cmd += getattr(args, 'command').split() + + # run backend driver + _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path)) + + # run post process script if it's given + if _utils._is_valid_attr(args, 'post_process'): + # NOTE: the given python script will be executed by venv of ONE + python_path = sys.executable + post_process_command = [python_path] + getattr(args, + 'post_process').strip().split(' ') + _utils._run(post_process_command, err_prefix='one-infer') + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-init b/compiler/one-cmds/one-init new file mode 100644 index 0000000..04c4534 --- /dev/null +++ b/compiler/one-cmds/one-init @@ -0,0 +1,280 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import copy +import glob +import itertools +import ntpath +import os +import sys + +import configparser +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +class CommentableConfigParser(configparser.ConfigParser): + """ + ConfigParser where comment can be stored + In Python ConfigParser, comment in ini file ( starting with ';') is considered a key of which + value is None. + Ref: https://stackoverflow.com/questions/6620637/writing-comments-to-files-with-configparser + """ + + def __init__(self): + # allow_no_value=True to add comment + # ref: https://stackoverflow.com/a/19432072 + configparser.ConfigParser.__init__(self, allow_no_value=True) + self.optionxform = str + + def add_comment(self, section, comment): + comment_sign = ';' + self[section][f'{comment_sign} {comment}'] = None + + +def _get_backends_list(): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-init` finds its backends + - `bin` folder where `one-init` exists + - `backends` folder + + NOTE If there are backends of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + backend_set = set() + + # bin folder + files = [f for f in glob.glob(dir_path + '/*-init')] + # backends folder + files += [f for f in glob.glob(dir_path + '/../backends/**/*-init', recursive=True)] + # TODO find backends in `$PATH` + + backends_list = [] + for cand in files: + base = ntpath.basename(cand) + if (not base in backend_set) and os.path.isfile(cand) and os.access( + cand, os.X_OK): + backend_set.add(base) + backends_list.append(cand) + + return backends_list + + +# TODO Add support for TF graphdef and bcq +def _get_parser(backends_list): + init_usage = ( + 'one-init [-h] [-v] [-V] ' + '[-i INPUT_PATH] ' + '[-o OUTPUT_PATH] ' + '[-m MODEL_TYPE] ' + '[-b BACKEND] ' + # args for onnx model + '[--convert_nchw_to_nhwc] ' + '[--nchw_to_nhwc_input_shape] ' + '[--nchw_to_nhwc_output_shape] ' + # args for backend driver + '[--] [COMMANDS FOR BACKEND DRIVER]') + """ + NOTE + layout options for onnx model could be difficult to users. + In one-init, we could consider easier args for the the above three: + For example, we could have another option, e.g., --input_img_layout LAYOUT + - When LAYOUT is NHWC, apply 'nchw_to_nhwc_input_shape=True' into cfg + - When LAYOUT is NCHW, apply 'nchw_to_nhwc_input_shape=False' into cfg + """ + + parser = argparse.ArgumentParser( + description='Command line tool to generate initial cfg file. ' + 'Currently tflite and onnx models are supported', + usage=init_usage) + + _utils._add_default_arg_no_CS(parser) + + parser.add_argument( + '-i', '--input_path', type=str, help='full filepath of the input model file') + parser.add_argument( + '-o', '--output_path', type=str, help='full filepath of the output cfg file') + parser.add_argument( + '-m', + '--model_type', + type=str, + help=('type of input model: "onnx", "tflite". ' + 'If the file extension passed to --input_path is ' + '".tflite" or ".onnx", this arg can be omitted.')) + + onnx_group = parser.add_argument_group('arguments when model type is onnx') + onnx_group.add_argument( + '--convert_nchw_to_nhwc', + action='store_true', + help= + 'Convert NCHW operators to NHWC under the assumption that input model is NCHW.') + onnx_group.add_argument( + '--nchw_to_nhwc_input_shape', + action='store_true', + help='Convert the input shape of the model (argument for convert_nchw_to_nhwc)') + onnx_group.add_argument( + '--nchw_to_nhwc_output_shape', + action='store_true', + help='Convert the output shape of the model (argument for convert_nchw_to_nhwc)') + + # get backend list in the directory + backends_name = [ntpath.basename(f) for f in backends_list] + if not backends_name: + backends_name_message = '(There is no available backend drivers)' + else: + backends_name_message = '(available backend drivers: ' + ', '.join( + backends_name) + ')' + backend_help_message = 'backend name to use ' + backends_name_message + parser.add_argument('-b', '--backend', type=str, help=backend_help_message) + + return parser + + +def _verify_arg(parser, args): + # check if required arguments is given + missing = [] + if not _utils._is_valid_attr(args, 'input_path'): + missing.append('-i/--input_path') + if not _utils._is_valid_attr(args, 'output_path'): + missing.append('-o/--output_path') + if not _utils._is_valid_attr(args, 'backend'): + missing.append('-b/--backend') + + if _utils._is_valid_attr(args, 'model_type'): + # TODO Support model types other than onnx and tflite (e.g., TF) + if getattr(args, 'model_type') not in ['onnx', 'tflite']: + parser.error('Allowed value for --model_type: "onnx" or "tflite"') + + if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'): + if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'): + missing.append('--convert_nchw_to_nhwc') + if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'): + if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'): + missing.append('--convert_nchw_to_nhwc') + + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + + +def _parse_arg(parser): + init_args = [] + backend_args = [] + argv = copy.deepcopy(sys.argv) + # delete file name + del argv[0] + # split by '--' + args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x] + + # one-init [-h] [-v] ... + if len(args): + init_args = args[0] + init_args = parser.parse_args(init_args) + backend_args = backend_args if len(args) < 2 else args[1] + # print version + if len(args) and init_args.version: + _utils._print_version_and_exit(__file__) + + return init_args, backend_args + + +def _get_executable(args, backends_list): + if _utils._is_valid_attr(args, 'backend'): + backend_base = getattr(args, 'backend') + '-init' + for cand in backends_list: + if ntpath.basename(cand) == backend_base: + return cand + raise FileNotFoundError(backend_base + ' not found') + + +# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354) +def _generate(): + # generate cfg file + config = CommentableConfigParser() + + def _add_onecc_sections(): + pass # NYI + + def _gen_import(): + pass # NYI + + def _gen_optimize(): + pass # NYI + + def _gen_quantize(): + pass # NYI + + def _gen_codegen(): + pass # NYI + + # + # NYI: one-profile, one-partition, one-pack, one-infer + # + + _add_onecc_sections() + + _gen_import() + _gen_optimize() + _gen_quantize() + _gen_codegen() + + with open(args.output_path, 'w') as f: + config.write(f) + + +def main(): + # get backend list + backends_list = _get_backends_list() + + # parse arguments + parser = _get_parser(backends_list) + args, backend_args = _parse_arg(parser) + + # verify arguments + _verify_arg(parser, args) + + # make a command to run given backend driver + driver_path = _get_executable(args, backends_list) + init_cmd = [driver_path] + backend_args + + # run backend driver + _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path)) + + #TODO generate cfg file + + raise NotImplementedError("NYI") + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index 8b1f3f7..481fc84 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import onelib.constant as _constant @@ -83,6 +82,14 @@ def _verify_arg(parser, args): if len(missing): parser.error('the following arguments are required: ' + ' '.join(missing)) + # default has pre-defined optimization options + default = _get_parser().parse_args() + + # check if unrecognized arguments are given + diff = set(dir(args)) - set(dir(default)) + if len(diff): + parser.error('the following arguments are unrecognized: ' + ' '.join(diff)) + def _parse_arg(parser): args = parser.parse_args() @@ -102,8 +109,8 @@ def _optimize(args): # make a command to optimize circle model circle2circle_path = os.path.join(dir_path, 'circle2circle') circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path, - getattr(args, 'input_path'), - getattr(args, 'output_path')) + getattr(args, 'input_path'), + getattr(args, 'output_path')) # verbose if _utils._is_valid_attr(args, 'verbose'): diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack index 133207d..5cab7c7 100644 --- a/compiler/one-cmds/one-pack +++ b/compiler/one-cmds/one-pack @@ -21,9 +21,7 @@ import argparse import os -import subprocess import sys -import tempfile import utils as _utils diff --git a/compiler/one-cmds/one-partition b/compiler/one-cmds/one-partition new file mode 100644 index 0000000..c0d71e5 --- /dev/null +++ b/compiler/one-cmds/one-partition @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import configparser +import os +import sys + +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +def _get_parser(): + parser = argparse.ArgumentParser( + description='command line tool to partition circle model by multiple backends') + + _utils._add_default_arg(parser) + + parser.add_argument( + '--backends', type=str, help='backends in CSV to use for partitioning') + parser.add_argument('--default', type=str, help='default backend to assign') + + parser.add_argument( + '--part_file', type=str, help='partition file which provides backend to assign') + parser.add_argument('--input_file', type=str, help='input circle model filename') + parser.add_argument( + '--work_path', + type=str, + help='work path of partition, input files exist and output files are produced') + + return parser + + +def _parse_arg(parser): + args = parser.parse_args() + # print version + if args.version: + _utils._print_version_and_exit(__file__) + + return args + + +def _verify_arg(parser, args): + """verify given arguments""" + # check if required arguments is given + missing = [] + if not _utils._is_valid_attr(args, 'part_file'): + missing.append('part_file') + if not _utils._is_valid_attr(args, 'input_file'): + missing.append('input_file') + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + return + + +def _partition(args): + # get file path to log + bin_path = os.path.dirname(os.path.realpath(__file__)) + cur_path = os.getcwd() + partition_path = os.path.join(cur_path, args.part_file) + logfile_path = partition_path + '.log' + + with open(logfile_path, 'wb', buffering=0) as f: + # make a command to package circle model and metadata into nnpackage + circle_partitioner_path = os.path.join(bin_path, 'circle-partitioner') + + cmd = [os.path.expanduser(circle_partitioner_path)] + + if _utils._is_valid_attr(args, 'backends'): + cmd.append('--backends') + cmd.append(getattr(args, 'backends')) + if _utils._is_valid_attr(args, 'default'): + cmd.append('--default') + cmd.append(getattr(args, 'default')) + if _utils._is_valid_attr(args, 'work_path'): + cmd.append('--work_path') + cmd.append(getattr(args, 'work_path')) + + cmd.append('--part_file') + cmd.append(args.part_file) + cmd.append('--input_file') + cmd.append(args.input_file) + + f.write((' '.join(cmd) + '\n').encode()) + + # run circle-partitoner + _utils._run(cmd, err_prefix='circle-partitioner', logfile=f) + + +def main(): + # parse arguments + parser = _get_parser() + args = _parse_arg(parser) + + # parse configuration file + _utils._parse_cfg(args, 'one-partition') + + if _utils._is_valid_attr(args, 'config'): + config_path = getattr(args, 'config') + _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args) + + # verify arguments + _verify_arg(parser, args) + + # do partition + _partition(args) + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv index 0f75166..b435671 100644 --- a/compiler/one-cmds/one-prepare-venv +++ b/compiler/one-cmds/one-prepare-venv @@ -41,6 +41,7 @@ VER_ONNX_TF=1.10.0 # Install tensorflow PIP_TRUSTED_HOST="--trusted-host pypi.org " +PIP_TRUSTED_HOST+="--trusted-host pypi.python.org " PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org " PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org " @@ -62,7 +63,8 @@ else ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW} fi ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability +# TODO remove version fix, https://github.com/Samsung/ONE/issues/9240 +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0 # Install PyTorch and ONNX related # NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL. @@ -72,6 +74,8 @@ TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html" if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}" fi +# TODO remove torch message +echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'" ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL} ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} @@ -84,3 +88,7 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then else ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF} fi + +# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051 +# TODO remove this when issue is resolved +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1 diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile index ed6d8bd..b19c215 100644 --- a/compiler/one-cmds/one-profile +++ b/compiler/one-cmds/one-profile @@ -25,9 +25,7 @@ import glob import itertools import ntpath import os -import subprocess import sys -import tempfile import utils as _utils diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize index f2eff24..9282007 100644 --- a/compiler/one-cmds/one-quantize +++ b/compiler/one-cmds/one-quantize @@ -21,11 +21,12 @@ import argparse import os -import subprocess import sys import tempfile +import json import utils as _utils +from utils import Command # TODO Find better way to suppress trackback on error sys.tracebacklimit = 0 @@ -67,6 +68,12 @@ def _get_parser(): action='store_true', help='generate profiling data') + # save intermediate file(s) + parser.add_argument( + '--save_intermediate', + action='store_true', + help='Save intermediate files to output folder') + ## arguments for quantization quantization_group = parser.add_argument_group('arguments for quantization') @@ -93,13 +100,13 @@ def _get_parser(): '--input_type', type=str, help= - 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.' + 'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.' ) quantization_group.add_argument( '--output_type', type=str, help= - 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.' + 'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.' ) quantization_group.add_argument( '--min_percentile', @@ -126,10 +133,50 @@ def _get_parser(): "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)" ) quantization_group.add_argument( - '--quant_config', - type=str, + '--quant_config', type=str, help="Path to the quantization configuration file.") + quantization_group.add_argument( + '--evaluate_result', + action='store_true', + help= + "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results." + ) + quantization_group.add_argument( + '--test_data', type=str, help="Path to the test data used for evaluation.") + quantization_group.add_argument( + '--print_mae', + action='store_true', + help= + "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mape', + action='store_true', + help= + "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mpeir', + action='store_true', + help= + "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_top1_match', + action='store_true', + help= + "Print Top-1 match ratio of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_top5_match', + action='store_true', + help= + "Print Top-5 match ratio of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mse', + action='store_true', help= - "Path to the quantization configuration file." + "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model." ) # arguments for force_quantparam option @@ -162,6 +209,14 @@ def _get_parser(): copy_quantparam_group.add_argument( '--dst_tensor_name', type=str, action='append', help='tensor name (string)') + # arguments for fake_quant option + fake_quant_group = parser.add_argument_group('arguments for fake_quantize option') + + fake_quant_group.add_argument( + '--fake_quantize', + action='store_true', + help='convert quantized model to fake-quantized fp32 model.') + return parser @@ -171,8 +226,29 @@ def _set_default_values(args): setattr(args, 'input_model_dtype', 'float32') if not _utils._is_valid_attr(args, 'quantized_dtype'): setattr(args, 'quantized_dtype', 'uint8') + if _utils._is_valid_attr(args, 'quant_config'): + # Get quantized_dtype from qconfig file + try: + with open(getattr(args, 'quant_config')) as f: + qconf = json.load(f) + if 'default_quantization_dtype' in qconf: + setattr(args, 'quantized_dtype', + qconf['default_quantization_dtype']) + except json.decoder.JSONDecodeError: + print('Failed to decode ' + getattr(args, 'quant_config') + + '. Please check it is a json file.') if not _utils._is_valid_attr(args, 'granularity'): setattr(args, 'granularity', 'layer') + if _utils._is_valid_attr(args, 'quant_config'): + # Get granularity from qconfig file + try: + with open(getattr(args, 'quant_config')) as f: + qconf = json.load(f) + if 'default_granularity' in qconf: + setattr(args, 'granularity', qconf['default_granularity']) + except json.decoder.JSONDecodeError: + print('Failed to decode ' + getattr(args, 'quant_config') + + '. Please check it is a json file.') if not _utils._is_valid_attr(args, 'mode'): setattr(args, 'mode', 'percentile') if not _utils._is_valid_attr(args, 'min_percentile'): @@ -238,11 +314,18 @@ def _quantize(args): _copy_qparam(args) return + if _utils._is_valid_attr(args, 'fake_quantize'): + # fake-quantize model + _fake_quantize(args) + return + # get file path to log dir_path = os.path.dirname(os.path.realpath(__file__)) logfile_path = os.path.realpath(args.output_path) + '.log' with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir: + if _utils._is_valid_attr(args, 'save_intermediate'): + tmpdir = os.path.dirname(logfile_path) # get driver path circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') record_minmax_path = os.path.join(dir_path, 'record-minmax') @@ -263,13 +346,19 @@ def _quantize(args): circle_quantizer_cmd.append(getattr(args, 'quantized_dtype')) if _utils._is_valid_attr(args, 'granularity'): circle_quantizer_cmd.append(getattr(args, 'granularity')) + if _utils._is_valid_attr(args, 'quant_config'): + # NOTE --config conflicts with --config option in onecc, so + # we use quant_config for one-quantize + circle_quantizer_cmd.append('--config') + circle_quantizer_cmd.append(getattr(args, 'quant_config')) # input and output path if _utils._is_valid_attr(args, 'input_path'): circle_quantizer_cmd.append(getattr(args, 'input_path')) - tmp_output_path_1 = os.path.join( + tmp_weights_fake_quant_path = os.path.join( tmpdir, - os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle' - circle_quantizer_cmd.append(tmp_output_path_1) + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.weights_fake_quant.circle' + circle_quantizer_cmd.append(tmp_weights_fake_quant_path) # profiling if _utils._is_valid_attr(args, 'generate_profile_data'): circle_quantizer_cmd.append('--generate_profile_data') @@ -279,45 +368,23 @@ def _quantize(args): # run circle-quantizer _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) - ## make a command to record min-max value of each tensor while running the representative dataset - circle_record_minmax_cmd = [record_minmax_path] - # verbose - if _utils._is_valid_attr(args, 'verbose'): - circle_record_minmax_cmd.append('--verbose') - # input and output path - circle_record_minmax_cmd.append('--input_model') - circle_record_minmax_cmd.append(tmp_output_path_1) - tmp_output_path_2 = os.path.join( + tmp_minmax_recorded_path = os.path.join( tmpdir, - os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle' - circle_record_minmax_cmd.append('--output_model') - circle_record_minmax_cmd.append(tmp_output_path_2) - # input data - if _utils._is_valid_attr(args, 'input_data'): - circle_record_minmax_cmd.append('--input_data') - circle_record_minmax_cmd.append(getattr(args, 'input_data')) - if _utils._is_valid_attr(args, 'input_data_format'): - circle_record_minmax_cmd.append('--input_data_format') - circle_record_minmax_cmd.append(getattr(args, 'input_data_format')) - # min and max percentile - if _utils._is_valid_attr(args, 'min_percentile'): - circle_record_minmax_cmd.append('--min_percentile') - circle_record_minmax_cmd.append(getattr(args, 'min_percentile')) - if _utils._is_valid_attr(args, 'max_percentile'): - circle_record_minmax_cmd.append('--max_percentile') - circle_record_minmax_cmd.append(getattr(args, 'max_percentile')) - # mode - if _utils._is_valid_attr(args, 'mode'): - circle_record_minmax_cmd.append('--mode') - circle_record_minmax_cmd.append(getattr(args, 'mode')) - # profiling - if _utils._is_valid_attr(args, 'generate_profile_data'): - circle_record_minmax_cmd.append('--generate_profile_data') - - f.write((' '.join(circle_record_minmax_cmd) + '\n').encode()) + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.minmax_recorded.circle' - # run record-minmax - _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f) + ## make a command to record min-max value of each tensor while running the representative dataset + record_minmax_cmd = Command(record_minmax_path, args, f) + record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \ + .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \ + .add_option_with_valid_args('--input_data', ['input_data']) \ + .add_option_with_valid_args('--input_data_format', ['input_data_format']) \ + .add_option_with_valid_args('--min_percentile', ['min_percentile']) \ + .add_option_with_valid_args('--max_percentile', ['max_percentile']) \ + .add_option_with_valid_args('--mode', ['mode']) \ + .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \ + .run() ## make a second command to quantize the model using the embedded information circle_quantizer_cmd = [circle_quantizer_path] @@ -349,7 +416,7 @@ def _quantize(args): circle_quantizer_cmd.append('--config') circle_quantizer_cmd.append(getattr(args, 'quant_config')) # input and output path - circle_quantizer_cmd.append(tmp_output_path_2) + circle_quantizer_cmd.append(tmp_minmax_recorded_path) if _utils._is_valid_attr(args, 'output_path'): circle_quantizer_cmd.append(getattr(args, 'output_path')) # profiling @@ -361,6 +428,38 @@ def _quantize(args): # run circle-quantizer _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) + # evaluate + if _utils._is_valid_attr(args, 'evaluate_result'): + circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff') + quant_model = "" + if _utils._is_valid_attr(args, 'output_path'): + quant_model = getattr(args, 'output_path') + tmp_fake_quant_model = os.path.join( + tmpdir, + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.fake_quant.circle' + + # do fake quantization + fake_quantize_cmd = Command(circle_quantizer_path, args, f) + fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \ + .run() + + # compare fake-quant model and fp32 model + circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f) + circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \ + .add_option_with_values('--second_model', [tmp_fake_quant_model]) \ + .add_option_with_valid_args('--first_input_data', ['test_data']) \ + .add_option_with_valid_args('--second_input_data', ['test_data']) \ + .add_option_with_valid_args('--input_data_format', ['input_data_format']) \ + .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \ + .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \ + .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \ + .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \ + .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \ + .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \ + .run() + def _write_qparam(args): # get file path to log @@ -433,6 +532,24 @@ def _copy_qparam(args): _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) +def _fake_quantize(args): + # get file path to log + dir_path = os.path.dirname(os.path.realpath(__file__)) + logfile_path = os.path.realpath(args.output_path) + '.log' + + with open(logfile_path, 'wb') as f: + # get driver path + circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') + q_model = getattr(args, 'input_path') + fq_model = getattr(args, 'output_path') + + # do fake quantization + fake_quantize_cmd = Command(circle_quantizer_path, args, f) + fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--fake_quantize', [q_model, fq_model]) \ + .run() + + def main(): # parse arguments parser = _get_parser() diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc index 25682ff..a5ba636 100644 --- a/compiler/one-cmds/onecc +++ b/compiler/one-cmds/onecc @@ -25,6 +25,8 @@ import os import subprocess import sys +from onelib.CfgRunner import CfgRunner +from onelib.WorkflowRunner import WorkflowRunner import utils as _utils # TODO Find better way to suppress trackback on error @@ -42,6 +44,7 @@ subtool_list = { 'backend': { 'codegen': 'Code generation tool', 'profile': 'Profile backend model file', + 'infer': 'Infer backend model file' }, } @@ -64,12 +67,25 @@ def _check_subtool_exists(): def _get_parser(): - onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [COMMAND ]' + onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [-W WORKFLOW] [-O OPTIMIZATION] [COMMAND ]' onecc_desc = 'Run ONE driver via several commands or configuration file' parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage) _utils._add_default_arg(parser) + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = ['-' + s for s in opt_name_list] + if not opt_name_list: + opt_help_message = '(No available optimization options)' + else: + opt_help_message = '(Available optimization options: ' + ', '.join( + opt_name_list) + ')' + opt_help_message = 'optimization name to use ' + opt_help_message + parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message) + + parser.add_argument( + '-W', '--workflow', type=str, metavar='WORKFLOW', help='run with workflow file') + # just for help message compile_group = parser.add_argument_group('compile to circle model') for tool, desc in subtool_list['compile'].items(): @@ -98,45 +114,17 @@ def _parse_arg(parser): def _verify_arg(parser, args): """verify given arguments""" # check if required arguments is given - if not _utils._is_valid_attr(args, 'config'): - parser.error('-C/--config argument is required') - - -def _get_driver_name(driver_name): - return { - 'one-optimize': 'one-optimize', - 'one-quantize': 'one-quantize', - 'one-pack': 'one-pack', - 'one-codegen': 'one-codegen', - 'one-profile': 'one-profile' - }[driver_name] - - -def _parse_cfg(args): - config = configparser.ConfigParser() - config.optionxform = str - parsed = config.read(os.path.expanduser(getattr(args, 'config'))) - if not parsed: - raise FileNotFoundError('Not found given configuration file') - return config - - -def _is_available_driver(config, driver_name): - return config.has_option('onecc', driver_name) and config.getboolean( - 'onecc', driver_name) - - -def _verify_cfg(import_driver_list, config): - if not config.has_section('onecc'): - raise ImportError('[onecc] section is required in configuration file') - - import_driver_cnt = 0 - for d in import_driver_list: - if _is_available_driver(config, d): - import_driver_cnt += 1 - - if import_driver_cnt > 1: - raise AssertionError('Only one import-* driver can be executed') + if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr( + args, 'workflow'): + parser.error('-C/--config or -W/--workflow argument is required') + # check if given optimization option exists + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list] + if _utils._is_valid_attr(args, 'O'): + if ' ' in getattr(args, 'O'): + parser.error('Not allowed to have space in the optimization name') + if not getattr(args, 'O') in opt_name_list: + parser.error('Invalid optimization option') def main(): @@ -158,35 +146,16 @@ def main(): # verify arguments _verify_arg(parser, args) - # parse configuration file - config = _parse_cfg(args) - - # verify configuration file bin_dir = os.path.dirname(os.path.realpath(__file__)) - import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) - transform_drivers = [ - 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' - ] - _verify_cfg(import_drivers_dict, config) - - # get sections to run - section_to_run = [] - for d in list(import_drivers_dict) + transform_drivers: - if _is_available_driver(config, d): - section_to_run.append(d) - - # run - dir_path = os.path.dirname(os.path.realpath(__file__)) - for section in section_to_run: - if section in import_drivers_dict: - # we already has driver name in dict - driver_name = import_drivers_dict[section] - else: - driver_name = _get_driver_name(section) - options = ['--config', getattr(args, 'config'), '--section', section] - if _utils._is_valid_attr(args, 'verbose'): - options.append('--verbose') - _call_driver(driver_name, options) + if _utils._is_valid_attr(args, 'config'): + runner = CfgRunner(args.config) + runner.detect_import_drivers(bin_dir) + if _utils._is_valid_attr(args, 'O'): + runner.add_opt(getattr(args, 'O')) + runner.run(bin_dir) + elif _utils._is_valid_attr(args, 'workflow'): + runner = WorkflowRunner(args.workflow) + runner.run(bin_dir) if __name__ == '__main__': diff --git a/compiler/one-cmds/onecc.template.cfg b/compiler/one-cmds/onecc.template.cfg index a23d1ce..6f6a4e2 100644 --- a/compiler/one-cmds/onecc.template.cfg +++ b/compiler/one-cmds/onecc.template.cfg @@ -1,28 +1,144 @@ +; To activate a step (or task), +; set True for the step in [onecc] section and fill options in the corresponding section [onecc] -one-import-tf=True +; neural network model to circle +one-import-tf=False one-import-tflite=False one-import-bcq=False one-import-onnx=False -one-optimize=True +; circle to circle with optimization +one-optimize=False +; circle to circle with quantization one-quantize=False -one-pack=True +; partition circle +one-partition=False +; package circle and metadata into nnpackage +one-pack=False +; generate code for backend one-codegen=False +; profile one-profile=False +; infer +one-infer=False [one-import-tf] -input_path=/path/to/inception_v3.pb -output_path=inception_v3.circle -input_arrays=input -input_shapes=1,299,299,3 -output_arrays=InceptionV3/Predictions/Reshape_1 -converter_version=v1 +# mandatory +; pb file +input_path= +; circle file +output_path= +# optional +; v1 or v2 +converter_version=v2 +; graph_def(default), saved_model or keras_model model_format=graph_def +# optional but mandatory for model_format=graph_def +; input tensor names of the input arrays, comma-separated +input_arrays= +; output tensor names of the input arrays, comma-separated +output_arrays= +; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3) +input_shapes= + +[one-import-tflite] +# mandatory +; tflite file +input_path= +; circle file +output_path= + +[one-import-bcq] +# mandatory +; bcq file +input_path= +; circle file +output_path= +# optional +; v1 or v2 +converter_version=v2 +; graph_def(default), saved_model or keras_model +model_format=graph_def +# optional but mandatory for model_format=graph_def +; input tensor names of the input arrays, comma-separated +input_arrays= +; output tensor names of the input arrays, comma-separated +output_arrays= +; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3) +input_shapes= + +[one-import-onnx] +# mandatory +; onnx file +input_path= +; circle file +output_path= +# optional +; True or False +unroll_rnn= +; True or False +unroll_lstm= [one-optimize] -input_path=inception_v3.circle -output_path=inception_v3.opt.circle -generate_profile_data=False +# mandatory +; circle file +input_path= +; circle file +output_path= +# //TODO: Add available options + +[one-quantize] +# mandatory +; circle file +input_path= +; circle file +output_path= +# optional arguments for quantization +; input data file (if not given, random data will be used for calibration) +input_data= +; h5/hdf5(default), list/filelist, or dir/directory +input_data_format= +; dtype of quantized model (uint8(default), int16) +quantized_dtype= +; granularity of quantization (layer(default), channel) +granularity= +; dtype of model's input (uint8, int16, float32). Same with quantized_dtype by default. +input_type= +; dtype of model's output (uint8, int16, float32). Same with quantized_dtype by default. +output_type= + +[one-partition] +# mandatory +; partition file which provides backend to assign +part_file= +; circle file +input_file= +# //TODO: Add available options [one-pack] -input_path=inception_v3.opt.circle -output_path=inception_v3_pack +# mandatory +; input path +input_path= +; output path +output_path= +# //TODO: Add available options + +[one-codegen] +# mandatory +; backend name +backend= +; commands for each backend +command= + +[one-profile] +# mandatory +; backend name +backend= +# //TODO: Add available options + +[one-infer] +# mandatory (mutually exclusive) +; backend name +backend= +; driver name +driver= +# //TODO: Add available options diff --git a/compiler/one-cmds/onelib/CfgRunner.py b/compiler/one-cmds/onelib/CfgRunner.py new file mode 100644 index 0000000..c66e5b4 --- /dev/null +++ b/compiler/one-cmds/onelib/CfgRunner.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import configparser +import os +import warnings + +import utils as oneutils + + +def _simple_warning(message, category, filename, lineno, file=None, line=None): + return f'{category.__name__}: {message}\n' + + +class CfgRunner: + driver_sequence = [ + 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile', + 'one-partition', 'one-infer' + ] + + def __init__(self, path): + self.path = path + self.optparser = None + self.cfgparser = configparser.ConfigParser() + # make option names case sensitive + self.cfgparser.optionxform = str + parsed = self.cfgparser.read(os.path.expanduser(path)) + if not parsed: + raise FileNotFoundError('Not found given configuration file') + + self._verify_cfg(self.cfgparser) + # default import drivers + self.import_drivers = [ + 'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite' + ] + + def _verify_cfg(self, cfgparser): + if not cfgparser.has_section('onecc'): + if cfgparser.has_section('one-build'): + warnings.formatwarning = _simple_warning + warnings.warn( + "[one-build] section will be deprecated. Please use [onecc] section.") + else: + raise ImportError('[onecc] section is required in configuration file') + + def _is_available(self, driver): + # if there's no `onecc` section, it will find `one-build` section because of backward compatibility + return (self.cfgparser.has_option('onecc', driver) and self.cfgparser.getboolean( + 'onecc', driver)) or (self.cfgparser.has_option('one-build', driver) + and self.cfgparser.getboolean('one-build', driver)) + + def add_opt(self, opt): + self.optparser = configparser.ConfigParser() + # make option names case sensitive + self.optparser.optionxform = str + opt_book = dict( + zip(oneutils._get_optimization_list(get_name=True), + oneutils._get_optimization_list())) + parsed = self.optparser.read(opt_book['O' + opt]) + if not parsed: + raise FileNotFoundError('Not found given optimization configuration file') + if len(self.optparser.sections()) != 1 or self.optparser.sections( + )[0] != 'one-optimize': + raise AssertionError( + 'Optimization configuration file only allowed to have a \'one-optimize\' section' + ) + self.opt = opt + + def detect_import_drivers(self, dir): + self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys()) + + def run(self, working_dir, verbose=False): + section_to_run = [] + for d in self.import_drivers + self.driver_sequence: + if self._is_available(d): + section_to_run.append(d) + + for section in section_to_run: + options = ['--config', self.path, '--section', section] + if section == 'one-optimize' and self.optparser: + options += ['-O', self.opt] + if verbose: + options.append('--verbose') + driver_path = os.path.join(working_dir, section) + cmd = [driver_path] + options + oneutils._run(cmd) diff --git a/compiler/one-cmds/onelib/OptionBuilder.py b/compiler/one-cmds/onelib/OptionBuilder.py new file mode 100644 index 0000000..6a75783 --- /dev/null +++ b/compiler/one-cmds/onelib/OptionBuilder.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from onelib.constant import CONSTANT + + +class OptionBuilder: + def __init__(self, one_cmd_type): + self.type = one_cmd_type + + def _build_default(self, commands): + options = [] + for k, v in commands.items(): + options.extend(['--' + k, v]) + return options + + def _build_with_unknown_command(self, commands): + COMMAND_K = 'command' + options = [] + for k, v in commands.items(): + if k == COMMAND_K: + continue + options.extend(['--' + k, v]) + options.extend(['--']) + options.extend(commands[COMMAND_K].split()) + return options + + def _build_import(self, commands): + options = [] + arg_0 = ['save_intermediate'] + for k, v in commands.items(): + if k in arg_0 and v == "True": + options.extend(['--' + k]) + continue + options.extend(['--' + k, v]) + return options + + def _build_optimize(self, commands): + options = [] + arg_0 = ['generate_profile_data'] + arg_1 = ['input_path', 'output_path', 'change_outputs'] + for k, v in commands.items(): + if k in arg_1: + options.extend(['--' + k, v]) + continue + if k in arg_0 and v == 'True': + options.extend(['--' + k]) + continue + for opt in CONSTANT.OPTIMIZATION_OPTS: + if k == opt[0] and v == "True": + options.extend(['--' + k]) + break + return options + + def _build_quantize(self, commands): + options = [] + arg_0 = [ + 'generate_profile_data', 'save_intermediate', 'TF-style_maxpool', + 'evaluate_result', 'print_mae', 'print_mape', 'print_mpeir', + 'print_top1_match', 'print_top5_match', 'force_quantparam', 'copy_quantparam' + ] + for k, v in commands.items(): + if k in arg_0 and v == "True": + options.extend(['--' + k]) + continue + options.extend(['--' + k, v]) + return options + + def build(self, commands): + cmd_book = dict.fromkeys( + ['one-import-bcq', 'one-import-tflite', 'one-pack', 'one-partition'], + self._build_default) + cmd_book['one-codegen'] = self._build_with_unknown_command + cmd_book['one-import-onnx'] = self._build_import + cmd_book['one-import-pytorch'] = self._build_import + cmd_book['one-import-tf'] = self._build_import + cmd_book['one-infer'] = self._build_with_unknown_command + cmd_book['one-optimize'] = self._build_optimize + cmd_book['one-profile'] = self._build_with_unknown_command + cmd_book['one-quantize'] = self._build_quantize + + return cmd_book[self.type](commands) diff --git a/compiler/one-cmds/onelib/TopologicalSortHelper.py b/compiler/one-cmds/onelib/TopologicalSortHelper.py new file mode 100644 index 0000000..d05adea --- /dev/null +++ b/compiler/one-cmds/onelib/TopologicalSortHelper.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict + + +class TopologicalSortHelper: + def __init__(self, vertices): + self.graph = defaultdict(list) + self.vertices = vertices + + def add_edge(self, u, v): + self.graph[u].append(v) + + def sort_util(self, v, visited, stack): + visited[v] = True + + for i in self.graph[v]: + if visited[i] == False: + self.sort_util(i, visited, stack) + + stack.insert(0, v) + + def sort(self): + visited = dict.fromkeys(self.vertices, False) + stack = [] + + for v in self.vertices: + if visited[v] == False: + self.sort_util(v, visited, stack) + + return stack diff --git a/compiler/one-cmds/onelib/WorkflowRunner.py b/compiler/one-cmds/onelib/WorkflowRunner.py new file mode 100644 index 0000000..0482dd9 --- /dev/null +++ b/compiler/one-cmds/onelib/WorkflowRunner.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from onelib.OptionBuilder import OptionBuilder +from onelib.TopologicalSortHelper import TopologicalSortHelper +from onelib.CfgRunner import CfgRunner +import utils as oneutils + + +class WorkflowRunner: + WORKFLOWS_K = 'workflows' + DEPENDENCIES_K = 'run-after' + CFG_REFERENCE_K = 'cfg-reference' + WORKFLOW_STEPS_K = 'steps' + ONE_CMD_TOOL_K = 'one-cmd' + COMMANDS_K = 'commands' + + def __init__(self, path): + try: + with open(path) as f: + self.json_contents = json.load(f) + except FileNotFoundError: + raise FileNotFoundError("Not found given workflow file") + except json.decoder.JSONDecodeError: + raise ImportError("Invalid workflow file") + + self._verify_workflow(self.json_contents) + + workflows = self.json_contents[self.WORKFLOWS_K] + self.adj = dict.fromkeys(workflows, []) + # decide the order according to the dependencies of each workflow. + helper = TopologicalSortHelper(workflows) + for workflow_k in workflows: + workflow = self.json_contents[workflow_k] + if self.DEPENDENCIES_K in workflow: + for previous_workflow in workflow[self.DEPENDENCIES_K]: + helper.add_edge(previous_workflow, workflow_k) + self.adj[previous_workflow].append(workflow_k) + self.workflow_sequence = helper.sort() + + self._check_cycle() + + def _check_cycle(self): + pos = dict() + index = 0 + workflow_num = len(self.workflow_sequence) + # number the order + for seq_idx in range(workflow_num): + pos[self.workflow_sequence[seq_idx]] = index + index += 1 + + for seq_idx in range(workflow_num): + first_wf = self.workflow_sequence[seq_idx] + for adj_wf in self.adj[first_wf]: + first_pos = 0 if first_wf not in pos else pos[first_wf] + second_pos = 0 if adj_wf not in pos else pos[adj_wf] + if (first_pos > second_pos): + raise RuntimeError("Workflows should not have a cycle") + + def _verify_workflow(self, json_contents): + # workflow file should have WORKFLOWS_K + if not self.WORKFLOWS_K in json_contents: + raise ValueError("Not found \"" + self.WORKFLOWS_K + + "\" key in workflow file") + + workflows = json_contents[self.WORKFLOWS_K] + # workflow file should have keys listed in WORKFLOWS_K + for workflow_k in workflows: + if not workflow_k in json_contents: + raise ValueError("Not found " + workflow_k + " key listed in \"" + + self.WORKFLOWS_K + "\"") + + # each workflow should have either WORKFLOW_STEPS_K or CFG_REFERENCE_K + for workflow_k in workflows: + if not self.WORKFLOW_STEPS_K in json_contents[workflow_k] and not self.CFG_REFERENCE_K in json_contents[workflow_k]: + raise ValueError("Each workflow should have either \"" + + self.WORKFLOW_STEPS_K + "\" or \"" + + self.CFG_REFERENCE_K + "\"") + for workflow_k in workflows: + if self.WORKFLOW_STEPS_K in json_contents[workflow_k] and self.CFG_REFERENCE_K in json_contents[workflow_k]: + raise ValueError("\"" + self.WORKFLOW_STEPS_K + "\" and \"" + + self.CFG_REFERENCE_K + "\" are exclusive key") + + # each step should have ONE_CMD_TOOL_K and COMMANDS_K + for workflow_k in workflows: + workflow = json_contents[workflow_k] + if self.WORKFLOW_STEPS_K in workflow: + step_keys = workflow[self.WORKFLOW_STEPS_K] + for step_k in step_keys: + step = workflow[step_k] + if not self.ONE_CMD_TOOL_K in step or not self.COMMANDS_K in step: + raise ValueError("Each step should have \"" + + self.ONE_CMD_TOOL_K + "\"" + " and \"" + + self.COMMANDS_K + "\"") + + def run(self, working_dir, verbose=False): + # run workflows in sequence + for workflow_k in self.workflow_sequence: + workflow = self.json_contents[workflow_k] + if self.WORKFLOW_STEPS_K in workflow: + steps = workflow[self.WORKFLOW_STEPS_K] + for step_k in steps: + step = workflow[step_k] + commands = step[self.COMMANDS_K] + driver_name = step[self.ONE_CMD_TOOL_K] + option_builder = OptionBuilder(driver_name) + options = option_builder.build(commands) + # get the absolute path of the caller + driver_path = os.path.join(working_dir, driver_name) + cmd = [driver_path] + options + oneutils._run(cmd) + elif self.CFG_REFERENCE_K in workflow: + cfg_path = workflow[self.CFG_REFERENCE_K]['path'] + runner = CfgRunner(cfg_path) + runner.run(working_dir, verbose) diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py index 7ddd738..7dd79b6 100644 --- a/compiler/one-cmds/onelib/constant.py +++ b/compiler/one-cmds/onelib/constant.py @@ -14,11 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. + class CONSTANT: __slots__ = () # This prevents access via __dict__. OPTIMIZATION_OPTS = ( # (OPTION_NAME, HELP_MESSAGE) - ('O1', 'enable O1 optimization pass'), ('convert_nchw_to_nhwc', 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.' ), @@ -29,6 +29,7 @@ class CONSTANT: 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'), ('fold_add_v2', 'fold AddV2 op with constant inputs'), ('fold_cast', 'fold Cast op with constant input'), + ('fold_densify', 'fold Densify op with sparse constant input'), ('fold_dequantize', 'fold Dequantize op'), ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), ('fold_gather', 'fold Gather op'), @@ -62,12 +63,16 @@ class CONSTANT: ('remove_unnecessary_slice', 'remove unnecessary slice ops'), ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'), ('remove_unnecessary_split', 'remove unnecessary split ops'), + ('replace_non_const_fc_with_batch_matmul', + 'replace FullyConnected op with non-const weights to BatchMatMul op'), + ('replace_sub_with_add', 'replace Sub op with Add op'), ('resolve_customop_add', 'convert Custom(Add) op to Add op'), ('resolve_customop_batchmatmul', 'convert Custom(BatchMatmul) op to BatchMatmul op'), ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'), ('resolve_customop_max_pool_with_argmax', 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'), + ('resolve_customop_splitv', 'convert Custom(SplitV) op to SplitV op'), ('shuffle_weight_to_16x1float32', 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.' ' Note that it only converts weights whose row is a multiple of 16'), diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py index d8380f2..0015e83 100644 --- a/compiler/one-cmds/onelib/make_cmd.py +++ b/compiler/one-cmds/onelib/make_cmd.py @@ -19,6 +19,7 @@ import sys import onelib.constant as _constant + def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) @@ -64,6 +65,10 @@ def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path): cmd.append('--output_arrays') cmd.append(getattr(args, 'output_arrays')) + # experimental options + if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'): + cmd.append('--experimental_disable_batchmatmul_unfold') + return cmd diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py index 26c2b75..0141514 100755 --- a/compiler/one-cmds/onnx_legalizer.py +++ b/compiler/one-cmds/onnx_legalizer.py @@ -341,7 +341,8 @@ def _dtype_to_np(dtype): raise NotImplementedError('unsupported data type') -def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name): +def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, + activation_name): """Generate subgraph of one direction of unrolled RNN layer Args: @@ -395,7 +396,7 @@ def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activa def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation, - clip, direction, hidden_size, layout): + clip, direction, hidden_size, layout): """Generate Simple (forward or reverse) unrolled RNN Args: @@ -432,7 +433,7 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a else: initial_h = None state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip, - activation) + activation) y_direction_dim = layout + 1 y_h_direction_dim = layout state_layout_tensors = [] @@ -447,12 +448,11 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a transformer.make_node( 'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations, - clip, hidden_size, layout): + clip, hidden_size, layout): """Generate Bidirectional unrolled RNN Args: @@ -503,10 +503,10 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim]) state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0], - initial_h[0], clip, activations[0]) + initial_h[0], clip, activations[0]) x.reverse() state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1], - initial_h[1], clip, activations[1]) + initial_h[1], clip, activations[1]) state_b_tensors.reverse() y_direction_dim = layout + 1 @@ -538,8 +538,7 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac axis=y_h_direction_dim) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _legalize_RNN(transformer, tensor_infos, node): @@ -600,10 +599,10 @@ def _legalize_RNN(transformer, tensor_infos, node): if direction in ['forward', 'reverse']: _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0], - clip, direction, hidden_size, layout) + clip, direction, hidden_size, layout) elif direction == 'bidirectional': - _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip, - hidden_size, layout) + _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, + clip, hidden_size, layout) else: raise RuntimeError('Unknown RNN type') @@ -611,7 +610,7 @@ def _legalize_RNN(transformer, tensor_infos, node): def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip, - act, dtype, hidden_size, batch_size): + act, dtype, hidden_size, batch_size): """Generate subgraph for one direction of unrolled LSTM layer Args: @@ -754,7 +753,7 @@ def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos, - activations, clip, direction, hidden_size, layout): + activations, clip, direction, hidden_size, layout): """Generate Simple (forward or reverse) unrolled LSTM Args: @@ -818,17 +817,15 @@ def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos, transformer.make_node( 'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) Y_c = outputs[2] - transformer.make_node( - 'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim]) + transformer.make_node('Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim]) if direction == 'reverse': state_layout_tensors.reverse() Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) -def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations, - clip, hidden_size, layout): +def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, + activations, clip, hidden_size, layout): """Generate Bidirectional unrolled LSTM Args: @@ -929,12 +926,10 @@ def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, a Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim]) Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim]) Y_c = outputs[2] - transformer.make_node( - 'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim) + transformer.make_node('Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _legalize_LSTM(transformer, tensor_infos, node): @@ -1001,10 +996,10 @@ def _legalize_LSTM(transformer, tensor_infos, node): if direction in ['forward', 'reverse']: _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations, - clip, direction, hidden_size, layout) + clip, direction, hidden_size, layout) elif direction == 'bidirectional': _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations, - clip, hidden_size, layout) + clip, hidden_size, layout) else: raise RuntimeError('Unknown LSTM type') @@ -1052,10 +1047,12 @@ def legalize(model, options): if __name__ == '__main__': if len(sys.argv) < 3: - print('usage: ./legalize_onnx.py \n' - '\n' - ' In stand-alone utility mode this tool provides basic funtionality\n' - ' If you want to have more control over applied transformations, use this legalizer as a library') + print( + 'usage: ./legalize_onnx.py \n' + '\n' + ' In stand-alone utility mode this tool provides basic funtionality\n' + ' If you want to have more control over applied transformations, use this legalizer as a library' + ) exit(1) options = LegalizeOptions() options.unroll_lstm = True diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake index b1aabdb..c279209 100644 --- a/compiler/one-cmds/requires.cmake +++ b/compiler/one-cmds/requires.cmake @@ -1,6 +1,7 @@ require("tf2tfliteV2") require("tflite2circle") require("circle2circle") +require("circle-eval-diff") require("circle-quantizer") require("record-minmax") require("vconone") diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt index caea756..17f55ec 100644 --- a/compiler/one-cmds/tests/CMakeLists.txt +++ b/compiler/one-cmds/tests/CMakeLists.txt @@ -4,6 +4,8 @@ file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test") file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg") file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json") +file(GLOB PYSCRIPTS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.py") +file(GLOB WORKFLOWITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.workflow.json") # Create a script to run the tests at installation folder set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh") @@ -45,6 +47,16 @@ foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS}) install(FILES ${QCONFIGITEM} DESTINATION test) endforeach(QCONFIGITEM) +foreach(PYSCRIPT IN ITEMS ${PYSCRIPTS}) + get_filename_component(ITEM_PREFIX ${PYSCRIPT} NAME_WE) + install(FILES ${PYSCRIPT} DESTINATION test) +endforeach(PYSCRIPT) + +foreach(WORKFLOWITEM IN ITEMS ${WORKFLOWITEMS}) + get_filename_component(ITEM_PREFIX ${WORKFLOWITEM} NAME_WE) + install(FILES ${WORKFLOWITEM} DESTINATION test) +endforeach(WORKFLOWITEM) + file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n") file(APPEND "${DRIVER_SCRIPT}" diff --git a/compiler/one-cmds/tests/OONECC_024.cfg b/compiler/one-cmds/tests/OONECC_024.cfg new file mode 100644 index 0000000..a39aae0 --- /dev/null +++ b/compiler/one-cmds/tests/OONECC_024.cfg @@ -0,0 +1,2 @@ +[one-optimize] +make_batchnorm_gamma_positive=True diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg index 615047c..8c777f6 100644 --- a/compiler/one-cmds/tests/one-build_008.cfg +++ b/compiler/one-cmds/tests/one-build_008.cfg @@ -15,7 +15,6 @@ output_path=test_onnx_model.circle [one-optimize] input_path=test_onnx_model.circle output_path=test_onnx_model.opt.circle -all=True remove_redundant_transpose=True [one-codegen] diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg index 66bca25..b5a35dd 100644 --- a/compiler/one-cmds/tests/one-build_009.cfg +++ b/compiler/one-cmds/tests/one-build_009.cfg @@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle [one-optimize] input_path=onnx_conv2d_conv2d.circle output_path=onnx_conv2d_conv2d.opt.circle -all=True remove_redundant_transpose=True convert_nchw_to_nhwc=True diff --git a/compiler/one-cmds/tests/one-import-onnx_002.test b/compiler/one-cmds/tests/one-import-onnx_002.test new file mode 100644 index 0000000..a6a38ee --- /dev/null +++ b/compiler/one-cmds/tests/one-import-onnx_002.test @@ -0,0 +1,71 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# test for experimental_disable_batchmatmul_unfold option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./reshape_matmul.onnx" +outputfile="./reshape_matmul.circle" + +rm -rf ${outputfile} +rm -rf ${outputfile}.log + +# run test without option that should drop FULLY_CONNECTED +one-import-onnx \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1 + +if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then + trap_err_onexit +fi + +rm -rf ${outputfile} +rm -rf ${outputfile}.log + +# run test with option that should drop BATCH_MATMUL +one-import-onnx \ +--experimental_disable_batchmatmul_unfold \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1 + +if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" +exit 0 diff --git a/compiler/one-cmds/tests/one-infer-test-post-process.py b/compiler/one-cmds/tests/one-infer-test-post-process.py new file mode 100644 index 0000000..0f0e0d7 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer-test-post-process.py @@ -0,0 +1,16 @@ +# This script gets one argument and print it + +import sys +from pathlib import Path + + +def main(): + if len(sys.argv) < 2: + filepath = Path(sys.argv[0]) + sys.exit("Usage: " + filepath.name + " [Word to print]") + word = sys.argv[1] + print(word) + + +if __name__ == '__main__': + main() diff --git a/compiler/one-cmds/tests/one-infer_001.test b/compiler/one-cmds/tests/one-infer_001.test new file mode 100644 index 0000000..e7b5695 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_001.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/help-infer + exit 255 +} + +trap trap_err_onexit ERR + +# copy help-infer to bin folder +cp help-infer ../bin/help-infer + +# run test +one-infer -b help -- -h > ${filename}.log + +rm -rf ../bin/help-infer + +if grep -q "HELP MESSAGE!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_002.test b/compiler/one-cmds/tests/one-infer_002.test new file mode 100644 index 0000000..22070de --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_002.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -d dummy-infer -- ${inputfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_003.test b/compiler/one-cmds/tests/one-infer_003.test new file mode 100644 index 0000000..e2aa459 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_003.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy -- ${inputfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_004.test b/compiler/one-cmds/tests/one-infer_004.test new file mode 100644 index 0000000..a4cb76c --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_004.test @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# print one-infer's help message + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -h > ${filename}.log + +if grep -q "command line tool to infer model" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_005.cfg b/compiler/one-cmds/tests/one-infer_005.cfg new file mode 100644 index 0000000..aca6878 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_005.cfg @@ -0,0 +1,3 @@ +[one-infer] +backend=dummy +command=sample.tvn diff --git a/compiler/one-cmds/tests/one-infer_005.test b/compiler/one-cmds/tests/one-infer_005.test new file mode 100644 index 0000000..a44dd0e --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_005.test @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with configuration input + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-infer_005.cfg" +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -C ${configfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_006.test b/compiler/one-cmds/tests/one-infer_006.test new file mode 100644 index 0000000..2612133 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_006.test @@ -0,0 +1,53 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with post process script + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1 +return_code=$? + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + if [ "$return_code" -eq "0" ]; then + echo "${filename_ext} SUCCESS" + exit 0 + fi +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_neg_001.test b/compiler/one-cmds/tests/one-infer_neg_001.test new file mode 100644 index 0000000..62e7211 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_001.test @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with no input + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_002.test b/compiler/one-cmds/tests/one-infer_neg_002.test new file mode 100644 index 0000000..fa88876 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_002.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# passed driver is not found + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +driver_name="neg-infer" + +trap_err_onexit() +{ + if grep -q "FileNotFoundError: ${driver_name} not found" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_003.test b/compiler/one-cmds/tests/one-infer_neg_003.test new file mode 100644 index 0000000..a000552 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_003.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# passed backend is not found + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +backend_name="neg" + +trap_err_onexit() +{ + if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_004.test b/compiler/one-cmds/tests/one-infer_neg_004.test new file mode 100644 index 0000000..b9130d0 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_004.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# both -b and -d option drivers are given as argument + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +backend_name="neg" +driver_name="neg2" + +trap_err_onexit() +{ + if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_005.test b/compiler/one-cmds/tests/one-infer_neg_005.test new file mode 100644 index 0000000..9074deb --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_005.test @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with invalid post process script + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + return_code=$? + if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + # Case of succeed of inference driver but error after it + if [ "$return_code" -ne "0" ]; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + fi + + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1 + +rm -rf ../bin/dummy-infer +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test index 8eb58f4..4152fa3 100644 --- a/compiler/one-cmds/tests/one-optimize_001.test +++ b/compiler/one-cmds/tests/one-optimize_001.test @@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > /dev/null 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_002.test b/compiler/one-cmds/tests/one-optimize_002.test index bd64494..58f792b 100644 --- a/compiler/one-cmds/tests/one-optimize_002.test +++ b/compiler/one-cmds/tests/one-optimize_002.test @@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --change_outputs InceptionV3/Logits/SpatialSqueeze1 \ --input_path ${inputfile} \ --output_path ${outputfile} > /dev/null 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test index f0b5563..c67e3d4 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_001.test +++ b/compiler/one-cmds/tests/one-optimize_neg_001.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${outputfile}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test index 72f306e..a1ef702 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_002.test +++ b/compiler/one-cmds/tests/one-optimize_neg_002.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${outputfile}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test index 3fe7d33..668a6c2 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_003.test +++ b/compiler/one-cmds/tests/one-optimize_neg_003.test @@ -44,7 +44,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path "${inputfile}" > "${filename}.log" 2>&1 echo "${filename_ext} FAILED" diff --git a/compiler/one-cmds/tests/one-optimize_neg_004.test b/compiler/one-cmds/tests/one-optimize_neg_004.test index e73911b..5abd4c5 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_004.test +++ b/compiler/one-cmds/tests/one-optimize_neg_004.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${filename}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --change_outputs non_existing_node_name \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-partition_001.test b/compiler/one-cmds/tests/one-partition_001.test new file mode 100644 index 0000000..a6fba07 --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_001.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="${testmodel}.circle" +partfile="${testmodel}.part" +outputfile="${testmodel}.conn.json" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log + +# run test +one-partition \ +--input_file ${inputfile} \ +--part_file ${partfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-partition_neg_001.test b/compiler/one-cmds/tests/one-partition_neg_001.test new file mode 100644 index 0000000..d54a94f --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_neg_001.test @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid .part file (wrong comply value) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + if grep -q "ERROR" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="${testmodel}.circle" +partfile="${testmodel}.neg.part" +outputfile="${testmodel}.conn.json" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log +rm -rf ${filename}.log + +# run test +one-partition \ +--input_file ${inputfile} \ +--part_file ${partfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-partition_neg_002.test b/compiler/one-cmds/tests/one-partition_neg_002.test new file mode 100644 index 0000000..23fe84c --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_neg_002.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid .cfg file (no one-partition section) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + if grep -q "'one-partition' section" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +cfgfile="${testmodel}.neg.cfg" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log +rm -rf ${filename}.log + +# run test +one-partition -C ${cfgfile}> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-quantize_010.test b/compiler/one-cmds/tests/one-quantize_010.test new file mode 100644 index 0000000..1095ba0 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_010.test @@ -0,0 +1,65 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_010.q.circle" +datafile="./inception_v3_test_data.h5" + +rm -rf ${outputfile} + +# to create inception_v3.circle +if [[ ! -s ${inputfile} ]]; then + /bin/bash one-import_001.test > /dev/null 2>&1 + return_code=$? + if [[ ${return_code} != 0 ]]; then + trap_err_onexit + fi +fi + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datafile} \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datafile} \ +--print_mpeir > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_011.test b/compiler/one-cmds/tests/one-quantize_011.test new file mode 100644 index 0000000..34d7f57 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_011.test @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_011.q.circle" +datafile="./inception_v3_test_data.h5" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datafile} \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datafile} \ +--print_top5_match > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_012.qconf.json b/compiler/one-cmds/tests/one-quantize_012.qconf.json new file mode 100644 index 0000000..4a15b04 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_012.qconf.json @@ -0,0 +1,16 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D", + "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool", + "InceptionV3/InceptionV3/Mixed_5b/concat", + "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool", + "InceptionV3/InceptionV3/Mixed_7c/concat", + "InceptionV3/Predictions/Reshape_1"], + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/one-cmds/tests/one-quantize_012.test b/compiler/one-cmds/tests/one-quantize_012.test new file mode 100644 index 0000000..fba18ac --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_012.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_012.q.circle" + +rm -rf ${outputfile} + +# run test without input data +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--quant_config one-quantize_012.qconf.json \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_013.qconf.json b/compiler/one-cmds/tests/one-quantize_013.qconf.json new file mode 100644 index 0000000..4a15b04 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_013.qconf.json @@ -0,0 +1,16 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D", + "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool", + "InceptionV3/InceptionV3/Mixed_5b/concat", + "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool", + "InceptionV3/InceptionV3/Mixed_7c/concat", + "InceptionV3/Predictions/Reshape_1"], + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/one-cmds/tests/one-quantize_013.test b/compiler/one-cmds/tests/one-quantize_013.test new file mode 100644 index 0000000..fd443d6 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_013.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# quantized_dtype and granularity are given by qconfig file +# (not by command line interface) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_013.q.circle" + +rm -rf ${outputfile} + +# run test without input data +# quantized_dtype and granularity are not given here +one-quantize \ +--input_dtype float32 \ +--quant_config one-quantize_013.qconf.json \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_014.test b/compiler/one-cmds/tests/one-quantize_014.test new file mode 100644 index 0000000..518c328 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_014.test @@ -0,0 +1,59 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test if `circle-eval-diff` supports directory input. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_014.q.circle" +datadir="./raw_files/" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datadir} \ +--input_data_format dir \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datadir} \ +--print_top5_match > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_015.test b/compiler/one-cmds/tests/one-quantize_015.test new file mode 100644 index 0000000..bb45b57 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_015.test @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test if --fake_quantize option works well + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.mat.q8.circle" +outputfile="./inception_v3.one-quantize_015.fq.circle" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--fake_quantize \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test index ac920a4..e182edf 100644 --- a/compiler/one-cmds/tests/one-quantize_neg_019.test +++ b/compiler/one-cmds/tests/one-quantize_neg_019.test @@ -42,7 +42,7 @@ one-quantize \ --input_dtype float32 \ --quantized_dtype int16 \ --granularity channel \ ---input_type float32 \ +--input_type float64 \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-quantize_neg_020.test b/compiler/one-cmds/tests/one-quantize_neg_020.test new file mode 100644 index 0000000..27b11c3 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_neg_020.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# check error message is printed when qconfig file is not json + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Failed to decode" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.quantized.neg_020.circle" + +rm -rf ${outputfile}.log + +# run test +one-quantize \ +--input_dtype float32 \ +--quant_config one-quantize_neg_020.test \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_008.cfg b/compiler/one-cmds/tests/onecc_008.cfg index 0be026e..020e274 100644 --- a/compiler/one-cmds/tests/onecc_008.cfg +++ b/compiler/one-cmds/tests/onecc_008.cfg @@ -15,7 +15,6 @@ output_path=test_onnx_model.circle [one-optimize] input_path=test_onnx_model.circle output_path=test_onnx_model.opt.circle -all=True remove_redundant_transpose=True [one-codegen] diff --git a/compiler/one-cmds/tests/onecc_009.cfg b/compiler/one-cmds/tests/onecc_009.cfg index a17ae59..86121c5 100644 --- a/compiler/one-cmds/tests/onecc_009.cfg +++ b/compiler/one-cmds/tests/onecc_009.cfg @@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle [one-optimize] input_path=onnx_conv2d_conv2d.circle output_path=onnx_conv2d_conv2d.opt.circle -all=True remove_redundant_transpose=True convert_nchw_to_nhwc=True diff --git a/compiler/one-cmds/tests/onecc_024.cfg b/compiler/one-cmds/tests/onecc_024.cfg new file mode 100644 index 0000000..7b4b1a8 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_024.cfg @@ -0,0 +1,22 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v1 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle +make_batchnorm_gamma_positive=False diff --git a/compiler/one-cmds/tests/onecc_024.test b/compiler/one-cmds/tests/onecc_024.test new file mode 100644 index 0000000..1f5daa1 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_024.test @@ -0,0 +1,77 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use `OONECC_024` optimization option + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +clean_envir() +{ + rm -rf ../optimization/OONECC_024.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi +} + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + clean_envir + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_024.cfg" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + +cp OONECC_024.cfg ../optimization + +# run test +LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1 + +clean_envir + +if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/onecc_025.cfg b/compiler/one-cmds/tests/onecc_025.cfg new file mode 100644 index 0000000..4776ea8 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_025.cfg @@ -0,0 +1,20 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle diff --git a/compiler/one-cmds/tests/onecc_025.test b/compiler/one-cmds/tests/onecc_025.test new file mode 100644 index 0000000..396f40c --- /dev/null +++ b/compiler/one-cmds/tests/onecc_025.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-import-tf -> one-optimize with the configuration file that includes `onecc` section + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_001.cfg" +outputfile="inception_v3.opt.circle" + +# run test +onecc -C ${configfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_026.cfg b/compiler/one-cmds/tests/onecc_026.cfg new file mode 100644 index 0000000..c27a136 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_026.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=True +one-pack=False +one-codegen=False + +[one-quantize] +input_path=inception_v3.circle +output_path=inception_v3.onecc_026.q.circle +input_data=inception_v3_test_data.h5 +evaluate_result=True +test_data=inception_v3_test_data.h5 +print_mpeir=True diff --git a/compiler/one-cmds/tests/onecc_026.test b/compiler/one-cmds/tests/onecc_026.test new file mode 100644 index 0000000..84cfa41 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_026.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +configfile="onecc_026.cfg" +outputfile="inception_v3.onecc_026.q.circle" + +rm -rf ${outputfile} + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/onecc_027.cfg b/compiler/one-cmds/tests/onecc_027.cfg new file mode 100644 index 0000000..d3f6b5e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_027.cfg @@ -0,0 +1,15 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False +one-profile=False +one-infer=True + +[one-infer] +backend=dummy +command=test_onnx_model.bin diff --git a/compiler/one-cmds/tests/onecc_027.test b/compiler/one-cmds/tests/onecc_027.test new file mode 100644 index 0000000..e727359 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_027.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-profile + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_027.cfg" + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +onecc -C ${configfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/onecc_028.test b/compiler/one-cmds/tests/onecc_028.test new file mode 100644 index 0000000..10ce158 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_028.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-pack + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_028.workflow.json" +outputfile="inception_v3_pkg" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_028.workflow.json b/compiler/one-cmds/tests/onecc_028.workflow.json new file mode 100644 index 0000000..84bfd01 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_028.workflow.json @@ -0,0 +1,37 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE", + "PACK" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "PACK": { + "one-cmd": "one-pack", + "commands": { + "input_path": "inception_v3.opt.circle", + "output_path": "inception_v3_pkg" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_029.test b/compiler/one-cmds/tests/onecc_029.test new file mode 100644 index 0000000..9bab1a1 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_029.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_029.workflow.json" +outputfile="inception_v3.quantized.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_029.workflow.json b/compiler/one-cmds/tests/onecc_029.workflow.json new file mode 100644 index 0000000..65c9ea6 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_029.workflow.json @@ -0,0 +1,30 @@ +{ + "workflows": [ + "QUANTIZE_WORKFLOW" + ], + "QUANTIZE_WORKFLOW": { + "steps": [ + "IMPORT_TF", + "QUANTIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_030.test b/compiler/one-cmds/tests/onecc_030.test new file mode 100644 index 0000000..c0aa56a --- /dev/null +++ b/compiler/one-cmds/tests/onecc_030.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_030.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_030.workflow.json b/compiler/one-cmds/tests/onecc_030.workflow.json new file mode 100644 index 0000000..111a1b0 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_030.workflow.json @@ -0,0 +1,29 @@ +{ + "workflows": [ + "codegen_wf" + ], + "codegen_wf": { + "steps": [ + "import_tf", + "codegen" + ], + "import_tf": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_031.test b/compiler/one-cmds/tests/onecc_031.test new file mode 100644 index 0000000..7a1c670 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_031.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tflite -> one-optimize -> one-codgen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_031.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_031.workflow.json b/compiler/one-cmds/tests/onecc_031.workflow.json new file mode 100644 index 0000000..83d52b9 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_031.workflow.json @@ -0,0 +1,33 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "codegen" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_032.test b/compiler/one-cmds/tests/onecc_032.test new file mode 100644 index 0000000..89b6c41 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_032.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_032.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_032.workflow.json b/compiler/one-cmds/tests/onecc_032.workflow.json new file mode 100644 index 0000000..08d3f0f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_032.workflow.json @@ -0,0 +1,42 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "quantize", + "codegen" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "quantize": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.quantized.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_033.test b/compiler/one-cmds/tests/onecc_033.test new file mode 100644 index 0000000..635582f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_033.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-pack + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_033.workflow.json" +outputfile="inception_v3_pkg" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_033.workflow.json b/compiler/one-cmds/tests/onecc_033.workflow.json new file mode 100644 index 0000000..01233ff --- /dev/null +++ b/compiler/one-cmds/tests/onecc_033.workflow.json @@ -0,0 +1,42 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "quantize", + "pack" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "quantize": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + }, + "pack": { + "one-cmd": "one-pack", + "commands": { + "input_path": "inception_v3.quantized.circle", + "output_path": "inception_v3_pkg" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_034.test b/compiler/one-cmds/tests/onecc_034.test new file mode 100644 index 0000000..e766548 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_034.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-onnx -> one-optimize -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_034.workflow.json" +outputfile="onnx_conv2d_conv2d.bin" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_034.workflow.json b/compiler/one-cmds/tests/onecc_034.workflow.json new file mode 100644 index 0000000..bc3cbbf --- /dev/null +++ b/compiler/one-cmds/tests/onecc_034.workflow.json @@ -0,0 +1,35 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "codegen" + ], + "import": { + "one-cmd": "one-import-onnx", + "commands": { + "input_path": "onnx_conv2d_conv2d.onnx", + "output_path": "onnx_conv2d_conv2d.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "onnx_conv2d_conv2d.circle", + "output_path": "onnx_conv2d_conv2d.opt.circle", + "remove_redundant_transpose": "True", + "convert_nchw_to_nhwc": "True" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_035.test b/compiler/one-cmds/tests/onecc_035.test new file mode 100644 index 0000000..762cdd3 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_035.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf generates intermediate files + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_035.workflow.json" +outputfile="inception_v3.alt.circle" +intermfile="inception_v3.alt.tflite" + +rm -rf ${outputfile} +rm -rf ${intermfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi +if [[ ! -s "${intermfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_035.workflow.json b/compiler/one-cmds/tests/onecc_035.workflow.json new file mode 100644 index 0000000..6abf1f3 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_035.workflow.json @@ -0,0 +1,22 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import" + ], + "import": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.alt.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v1", + "save_intermediate": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_036.test b/compiler/one-cmds/tests/onecc_036.test new file mode 100644 index 0000000..865255e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_036.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-onnx generates intermediate files + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_036.workflow.json" +outputfile="test_onnx_model.circle" +intermfile="test_onnx_model.tflite" + +rm -rf ${outputfile} +rm -rf ${intermfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi +if [[ ! -s "${intermfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_036.workflow.json b/compiler/one-cmds/tests/onecc_036.workflow.json new file mode 100644 index 0000000..5fa29ed --- /dev/null +++ b/compiler/one-cmds/tests/onecc_036.workflow.json @@ -0,0 +1,18 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import" + ], + "import": { + "one-cmd": "one-import-onnx", + "commands": { + "input_path": "test_onnx_model.onnx", + "output_path": "test_onnx_model.circle", + "save_intermediate": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_037.test b/compiler/one-cmds/tests/onecc_037.test new file mode 100644 index 0000000..52ea9e4 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_037.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_037.workflow.json" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_037.workflow.json b/compiler/one-cmds/tests/onecc_037.workflow.json new file mode 100644 index 0000000..3317fb2 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_037.workflow.json @@ -0,0 +1,29 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "IMPORT", + "OPTIMIZE" + ], + "IMPORT": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_038.test b/compiler/one-cmds/tests/onecc_038.test new file mode 100644 index 0000000..6b8f7cf --- /dev/null +++ b/compiler/one-cmds/tests/onecc_038.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_038.workflow.json" +outputfile="inception_v3.list.quantized.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_038.workflow.json b/compiler/one-cmds/tests/onecc_038.workflow.json new file mode 100644 index 0000000..5ac515d --- /dev/null +++ b/compiler/one-cmds/tests/onecc_038.workflow.json @@ -0,0 +1,31 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "IMPORT", + "QUANTIZE" + ], + "IMPORT": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.list.quantized.circle", + "input_data": "datalist.txt", + "input_data_format": "list" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_039.test b/compiler/one-cmds/tests/onecc_039.test new file mode 100644 index 0000000..7db9d90 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_039.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-quantize quantizes the model and evaluates the result + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +workflowfile="onecc_039.workflow.json" +outputfile="inception_v3.onecc_039.q.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/onecc_039.workflow.json b/compiler/one-cmds/tests/onecc_039.workflow.json new file mode 100644 index 0000000..55ef569 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_039.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_040.cfg b/compiler/one-cmds/tests/onecc_040.cfg new file mode 100644 index 0000000..4776ea8 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.cfg @@ -0,0 +1,20 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle diff --git a/compiler/one-cmds/tests/onecc_040.test b/compiler/one-cmds/tests/onecc_040.test new file mode 100644 index 0000000..2f75677 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow with cfg reference + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_040.workflow.json" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_040.workflow.json b/compiler/one-cmds/tests/onecc_040.workflow.json new file mode 100644 index 0000000..2d4119b --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.workflow.json @@ -0,0 +1,10 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "cfg-reference": { + "path": "onecc_040.cfg" + } + } +} diff --git a/compiler/one-cmds/tests/onecc_041.cfg b/compiler/one-cmds/tests/onecc_041.cfg new file mode 100644 index 0000000..16135f0 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3_without_opt.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 diff --git a/compiler/one-cmds/tests/onecc_041.test b/compiler/one-cmds/tests/onecc_041.test new file mode 100644 index 0000000..791dd12 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.test @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflows + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Do inference of inception_v3_without_opt\.circle" "${filename}.log" && + grep -q "Do inference of inception_v3\.opt\.circle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +workflowfile="onecc_041.workflow.json" +outputfile1="inception_v3_without_opt.circle" +outputfile2="inception_v3.opt.circle" + +cp dummy-inferV2 ../bin/dummy-inferV2 + +rm -rf ${outputfile1} {outputfile2} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +rm -rf ../bin/dummy-inferV2 + +if [[ ! -s "${outputfile1}" ]] && [[ ! -s "${outputfile2}" ]]; then + trap_err_onexit +fi + +check_message diff --git a/compiler/one-cmds/tests/onecc_041.workflow.json b/compiler/one-cmds/tests/onecc_041.workflow.json new file mode 100644 index 0000000..7dfc1c6 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.workflow.json @@ -0,0 +1,61 @@ +{ + "workflows": [ + "WITHOUT_OPT", + "WITH_OPT", + "INFER" + ], + "INFER": { + "run-after": [ + "WITHOUT_OPT", + "WITH_OPT" + ], + "steps": [ + "INFER1", + "INFER2" + ], + "INFER1": { + "one-cmd": "one-infer", + "commands" : { + "driver": "dummy-inferV2", + "command": "inception_v3_without_opt.circle" + } + }, + "INFER2": { + "one-cmd": "one-infer", + "commands": { + "driver": "dummy-inferV2", + "command": "inception_v3.opt.circle" + } + } + }, + "WITHOUT_OPT": { + "cfg-reference": { + "path": "onecc_041.cfg" + } + }, + "WITH_OPT": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } + +} diff --git a/compiler/one-cmds/tests/onecc_neg_009.test b/compiler/one-cmds/tests/onecc_neg_009.test new file mode 100644 index 0000000..54dd129 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_009.test @@ -0,0 +1,69 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Valid optimization option but invalid configuration file path + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + rm -rf ../optimization/OONECC_NEG_009.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi + if grep -q "Not found given configuration file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + + +touch ../optimization/OONECC_NEG_009.cfg + +configfile=".." + +# run test +onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_010.test b/compiler/one-cmds/tests/onecc_neg_010.test new file mode 100644 index 0000000..ddad5e6 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_010.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Invalid optimization option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Invalid optimization option" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile=".." + +# run test +onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_011.cfg b/compiler/one-cmds/tests/onecc_neg_011.cfg new file mode 100644 index 0000000..b587324 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_011.cfg @@ -0,0 +1,13 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle +wrong_opt=True diff --git a/compiler/one-cmds/tests/onecc_neg_011.test b/compiler/one-cmds/tests/onecc_neg_011.test new file mode 100644 index 0000000..3f043a7 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_011.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# generate error for unrecognized opitmization option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "following arguments are unrecognized" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_neg_011.cfg" + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_012.cfg b/compiler/one-cmds/tests/onecc_neg_012.cfg new file mode 100644 index 0000000..fdc73ef --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_012.cfg @@ -0,0 +1,15 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False +one-profile=False +one-infer=True + +[one-infer] +driver=dummy-infer +backend=dummy +command="dummy arguments" diff --git a/compiler/one-cmds/tests/onecc_neg_012.test b/compiler/one-cmds/tests/onecc_neg_012.test new file mode 100644 index 0000000..9feca5f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_012.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Check driver and backend option is mutually exclusive + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_neg_012.cfg" + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_013.test b/compiler/one-cmds/tests/onecc_neg_013.test new file mode 100644 index 0000000..0dd8a0f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_013.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with missing workflow file + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found given workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_013.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_014.test b/compiler/one-cmds/tests/onecc_neg_014.test new file mode 100644 index 0000000..2ed5dcb --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_014.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# invalid workflow file + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Invalid workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_014.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_014.workflow.json b/compiler/one-cmds/tests/onecc_neg_014.workflow.json new file mode 100644 index 0000000..8d4fd43 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_014.workflow.json @@ -0,0 +1,3 @@ +{ + INVALID JSON FILE +} diff --git a/compiler/one-cmds/tests/onecc_neg_015.test b/compiler/one-cmds/tests/onecc_neg_015.test new file mode 100644 index 0000000..079ba67 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_015.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found" "${filename}.log" && + grep -q "key in workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_015.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_015.workflow.json b/compiler/one-cmds/tests/onecc_neg_015.workflow.json new file mode 100644 index 0000000..4cb752e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_015.workflow.json @@ -0,0 +1,21 @@ +{ + "workflowsssssss": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_016.test b/compiler/one-cmds/tests/onecc_neg_016.test new file mode 100644 index 0000000..c52763f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_016.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found" "${filename}.log" && + grep -q "key listed in" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_016.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_016.workflow.json b/compiler/one-cmds/tests/onecc_neg_016.workflow.json new file mode 100644 index 0000000..c929cf3 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_016.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOWWWWW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_017.test b/compiler/one-cmds/tests/onecc_neg_017.test new file mode 100644 index 0000000..2f173d2 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_017.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each workflow should have either" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_017.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_017.workflow.json b/compiler/one-cmds/tests/onecc_neg_017.workflow.json new file mode 100644 index 0000000..22f1415 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_017.workflow.json @@ -0,0 +1,18 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_018.test b/compiler/one-cmds/tests/onecc_neg_018.test new file mode 100644 index 0000000..bc2297e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_018.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "are exclusive key" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_018.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_018.workflow.json b/compiler/one-cmds/tests/onecc_neg_018.workflow.json new file mode 100644 index 0000000..58cb88e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_018.workflow.json @@ -0,0 +1,24 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "cfg-reference": { + "path": "/path/to/ini/format/file" + }, + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_019.test b/compiler/one-cmds/tests/onecc_neg_019.test new file mode 100644 index 0000000..11ef3a9 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_019.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each step should have" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_019.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_019.workflow.json b/compiler/one-cmds/tests/onecc_neg_019.workflow.json new file mode 100644 index 0000000..aedeeec --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_019.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmddddddddd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_020.test b/compiler/one-cmds/tests/onecc_neg_020.test new file mode 100644 index 0000000..7f5073d --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_020.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each step should have" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_020.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_020.workflow.json b/compiler/one-cmds/tests/onecc_neg_020.workflow.json new file mode 100644 index 0000000..d3446d3 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_020.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commandssssssssss": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_021.test b/compiler/one-cmds/tests/onecc_neg_021.test new file mode 100644 index 0000000..e9d4baa --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_021.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have a cycle + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Workflows should not have a cycle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_021.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_021.workflow.json b/compiler/one-cmds/tests/onecc_neg_021.workflow.json new file mode 100644 index 0000000..6d21111 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_021.workflow.json @@ -0,0 +1,44 @@ +{ + "workflows": [ + "CYCLE_WF1", + "CYCLE_WF2" + ], + "CYCLE_WF1": { + "run-after": [ + "CYCLE_WF2" + ], + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + }, + "CYCLE_WF2": { + "run-after": [ + "CYCLE_WF1" + ], + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_022.cfg b/compiler/one-cmds/tests/onecc_neg_022.cfg new file mode 100644 index 0000000..16135f0 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3_without_opt.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 diff --git a/compiler/one-cmds/tests/onecc_neg_022.test b/compiler/one-cmds/tests/onecc_neg_022.test new file mode 100644 index 0000000..5400717 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have a cycle + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Workflows should not have a cycle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_022.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_022.workflow.json b/compiler/one-cmds/tests/onecc_neg_022.workflow.json new file mode 100644 index 0000000..2e056ac --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.workflow.json @@ -0,0 +1,63 @@ +{ + "workflows": [ + "WITHOUT_OPT", + "WITH_OPT", + "INFER" + ], + "INFER": { + "run-after": [ + "WITHOUT_OPT", + "WITH_OPT" + ], + "steps": [ + "INFER1", + "INFER2" + ], + "INFER1": { + "one-cmd": "one-infer", + "commands" : { + "driver": "dummy-inferV2", + "command": "inception_v3_without_opt.circle" + } + }, + "INFER2": { + "one-cmd": "one-infer", + "commands": { + "driver": "dummy-inferV2", + "command": "inception_v3.opt.circle" + } + } + }, + "WITHOUT_OPT": { + "cfg-reference": { + "path": "onecc_041.cfg" + } + }, + "WITH_OPT": { + "run-after": [ + "WITHOUT_OPT" + ], + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_023.test b/compiler/one-cmds/tests/onecc_neg_023.test new file mode 100644 index 0000000..09717e8 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_023.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have wrong optimize option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Change outputs failed" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_023.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_023.workflow.json b/compiler/one-cmds/tests/onecc_neg_023.workflow.json new file mode 100644 index 0000000..056e704 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_023.workflow.json @@ -0,0 +1,30 @@ +{ + "workflows": [ + "WITH_OPT" + ], + "WITH_OPT": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle", + "change_outputs": "non_existing_node_name" + } + } + } +} diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index c80c598..c171cfe 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -91,6 +91,20 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444 fi +if [[ ! -s "reshape_matmul.onnx" ]]; then + rm -rf reshape_matmul.zip + wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip + unzip reshape_matmul.zip + # https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137 +fi + +if [[ ! -s "Net_InstanceNorm_003.part" ]]; then + rm -rf Net_InstanceNorm_003.zip + wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip + unzip Net_InstanceNorm_003.zip + # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257 +fi + function files_missing() { condition="test " diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py index be0322a..d204447 100644 --- a/compiler/one-cmds/utils.py +++ b/compiler/one-cmds/utils.py @@ -47,6 +47,25 @@ def _add_default_arg(parser): parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS) +def _add_default_arg_no_CS(parser): + """ + This adds -v -V args only (no -C nor -S) + """ + # version + parser.add_argument( + '-v', + '--version', + action='store_true', + help='show program\'s version number and exit') + + # verbose + parser.add_argument( + '-V', + '--verbose', + action='store_true', + help='output additional information to stdout or stderr') + + def is_accumulated_arg(arg, driver): if driver == "one-quantize": accumulables = [ @@ -62,6 +81,43 @@ def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) +class Command: + def __init__(self, driver, args, log_file): + self.cmd = [driver] + self.driver = driver + self.args = args + self.log_file = log_file + + # Add option if attrs are valid + # Option values are collected from self.args + def add_option_with_valid_args(self, option, attrs): + for attr in attrs: + if not _is_valid_attr(self.args, attr): + return self + self.cmd.append(option) + for attr in attrs: + self.cmd.append(getattr(self.args, attr)) + return self + + # Add option and values without any condition + def add_option_with_values(self, option, values): + self.cmd.append(option) + for value in values: + self.cmd.append(value) + return self + + # Add option with no argument (ex: --verbose) if attr is valid + def add_noarg_option_if_valid_arg(self, option, attr): + if _is_valid_attr(self.args, attr): + self.cmd.append(option) + return self + + # Run cmd and save logs + def run(self): + self.log_file.write((' '.join(self.cmd) + '\n').encode()) + _run(self.cmd, err_prefix=self.driver, logfile=self.log_file) + + def _parse_cfg_and_overwrite(config_path, section, args): """ parse given section of configuration file and set the values of args. @@ -153,8 +209,7 @@ def _run(cmd, err_prefix=None, logfile=None): err_prefix: prefix to be put before every stderr lines logfile: file stream to which both of stdout and stderr lines will be written """ - with subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p: + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: import select inputs = set([p.stdout, p.stderr]) while inputs: diff --git a/compiler/onnx-tools/CMakeLists.txt b/compiler/onnx-tools/CMakeLists.txt index ac4500e..5935cdf 100644 --- a/compiler/onnx-tools/CMakeLists.txt +++ b/compiler/onnx-tools/CMakeLists.txt @@ -18,4 +18,10 @@ foreach(ONNX_TOOL IN ITEMS ${ONNX_TOOL_FILES}) add_custom_target(${ONNX_TOOL_TARGET} ALL DEPENDS ${ONNX_TOOL_BIN}) + install(FILES ${ONNX_TOOL_BIN} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION bin) + endforeach(ONNX_TOOL) diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 51fd9a3..96dfc86 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) unset(QUANTIZATION_CONFIG_VALUE_TEST) diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt index 31b9061..6363614 100644 --- a/compiler/record-minmax-conversion-test/CMakeLists.txt +++ b/compiler/record-minmax-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(RECORD_MINMAX_CONVERSION_TEST) macro(addTest NAME) diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp index c9f1d0c..faa402f 100644 --- a/compiler/record-minmax/driver/Driver.cpp +++ b/compiler/record-minmax/driver/Driver.cpp @@ -34,62 +34,33 @@ int entry(const int argc, char **argv) arser::Arser arser( "Embedding min/max values of activations to the circle model for post-training quantization"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); - arser.add_argument("--input_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Input model filepath"); + arser.add_argument("--input_model").required(true).help("Input model filepath"); arser.add_argument("--input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath. If not given, record-minmax will run with randomly generated data. " "Note that the random dataset does not represent inference workload, leading to poor " "model accuracy."); - arser.add_argument("--output_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Output model filepath"); + arser.add_argument("--output_model").required(true).help("Output model filepath"); arser.add_argument("--min_percentile") - .nargs(1) .type(arser::DataType::FLOAT) .help("Record n'th percentile of min"); arser.add_argument("--max_percentile") - .nargs(1) .type(arser::DataType::FLOAT) .help("Record n'th percentile of max"); - arser.add_argument("--mode") - .nargs(1) - .type(arser::DataType::STR) - .help("Record mode. percentile (default) or moving_average"); + arser.add_argument("--mode").help("Record mode. percentile (default) or moving_average"); arser.add_argument("--input_data_format") - .nargs(1) - .type(arser::DataType::STR) .help("Input data format. h5/hdf5 (default) or list/filelist"); arser.add_argument("--generate_profile_data") .nargs(0) - .required(false) .default_value(false) .help("This will turn on profiling data generation."); diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h index ba199d0..5b993e4 100644 --- a/compiler/record-minmax/include/RecordFunction.h +++ b/compiler/record-minmax/include/RecordFunction.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include namespace record_minmax diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp index 8288d3e..e6edbdc 100644 --- a/compiler/record-minmax/src/MinMaxObserver.cpp +++ b/compiler/record-minmax/src/MinMaxObserver.cpp @@ -18,6 +18,7 @@ #include +#include #include using DataType = luci_interpreter::DataType; @@ -75,7 +76,7 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node, // Reshape changes only shape of input tensor, efficiently is it a no-op. return; default: - throw std::runtime_error("Tensor's data type is not float"); + throw std::runtime_error("Tensor's data type is not float. " + node->name()); } } diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp index 10a1451..6dbf98d 100644 --- a/compiler/record-minmax/src/RecordMinMax.cpp +++ b/compiler/record-minmax/src/RecordMinMax.cpp @@ -186,7 +186,13 @@ void RecordMinMax::initialize(const std::string &input_model_path) throw std::runtime_error("Failed to verify circle '" + input_model_path + "'"); } - _module = luci::Importer().importModule(circle::GetModel(model_data.data())); + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + throw std::runtime_error("Failed to load '" + input_model_path + "'"); + } + + _module = luci::Importer().importModule(circle_model); if (_module == nullptr) { diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt index f57102f..8dcf4c2 100644 --- a/compiler/souschef/CMakeLists.txt +++ b/compiler/souschef/CMakeLists.txt @@ -1,13 +1,20 @@ nnas_find_package(Protobuf QUIET) +nnas_find_package(Fp16Source QUIET) if(NOT Protobuf_FOUND) message(STATUS "Build souschef: FAILED (missing Protobuf)") return() endif(NOT Protobuf_FOUND) +if(NOT Fp16Source_FOUND) + message(STATUS "Build souschef: FAILED (missing Fp16Source)") + return() +endif(NOT Fp16Source_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") add_library(souschef STATIC ${SOURCES}) set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(souschef PRIVATE ${Fp16Source_DIR}/include) target_include_directories(souschef PUBLIC include) target_link_libraries(souschef PUBLIC libprotobuf) diff --git a/compiler/souschef/include/souschef/Data/Explicit.h b/compiler/souschef/include/souschef/Data/Explicit.h index 7cbb773..434d0ec 100644 --- a/compiler/souschef/include/souschef/Data/Explicit.h +++ b/compiler/souschef/include/souschef/Data/Explicit.h @@ -96,6 +96,41 @@ template struct ExplicitDataChefFactory : public DataChefFactory } }; +class ExplicitFloat16DataChef final : public DataChef +{ +public: + ExplicitFloat16DataChef() + { + // DO NOTHING + } + +public: + std::vector generate(int32_t count) const override; + +public: + void insert(const float &value) { _values.emplace_back(value); } + +private: + // NOTE store values in float but will convert to uint16_t in generate() + std::vector _values; +}; + +struct ExplicitFloat16DataChefFactory : public DataChefFactory +{ + std::unique_ptr create(const Arguments &args) const + { + std::unique_ptr res{new ExplicitFloat16DataChef}; + + for (uint32_t n = 0; n < args.count(); ++n) + { + auto const value = to_number(args.value(n)); + res->insert(value); + } + + return std::move(res); + } +}; + } // namespace souschef #endif // __SOUSCHEF_DATA_EXPLICIT_H__ diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h index 8093b4c..c9ac571 100644 --- a/compiler/souschef/include/souschef/Data/Gaussian.h +++ b/compiler/souschef/include/souschef/Data/Gaussian.h @@ -41,6 +41,22 @@ private: float _stddev; }; +class GaussianFloat16DataChef final : public DataChef +{ +public: + GaussianFloat16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev} + { + // DO NOTHING + } + +public: + std::vector generate(int32_t count) const override; + +private: + float _mean; + float _stddev; +}; + class GaussianInt32DataChef final : public DataChef { public: @@ -109,6 +125,11 @@ struct GaussianUint8DataChefFactory : public DataChefFactory std::unique_ptr create(const Arguments &args) const; }; +struct GaussianFloat16DataChefFactory : public DataChefFactory +{ + std::unique_ptr create(const Arguments &args) const; +}; + } // namespace souschef #endif // __SOUSCHEF_DATA_GAUSSIAN_H__ diff --git a/compiler/souschef/src/Explicit.cpp b/compiler/souschef/src/Explicit.cpp index eb36cb7..3278ae3 100644 --- a/compiler/souschef/src/Explicit.cpp +++ b/compiler/souschef/src/Explicit.cpp @@ -19,6 +19,8 @@ #include #include +#include + namespace souschef { @@ -74,4 +76,23 @@ void ExplicitDataChef::write_value(std::vector &res, int32 } } +std::vector ExplicitFloat16DataChef::generate(int32_t count) const +{ + std::vector res; + + for (uint32_t n = 0; n < count; ++n) + { + float const fvalue = (n < _values.size()) ? _values.at(n) : 0.0; + uint16_t const value = fp16_ieee_from_fp32_value(fvalue); + auto const arr = reinterpret_cast(&value); + + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + res.emplace_back(arr[b]); + } + } + + return res; +} + } // namespace souschef diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp index 32cbcff..53a62ca 100644 --- a/compiler/souschef/src/Gaussian.cpp +++ b/compiler/souschef/src/Gaussian.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace souschef { @@ -36,7 +38,7 @@ static std::vector generate_gaussian(int32_t count, float mean, float s std::vector res; constexpr float max_cap = std::numeric_limits::max(); - constexpr float min_cap = std::numeric_limits::min(); + constexpr float min_cap = std::numeric_limits::lowest(); for (uint32_t n = 0; n < count; ++n) { float raw_value = dist(rand); @@ -69,6 +71,34 @@ std::vector GaussianFloat32DataChef::generate(int32_t count) const return generate_gaussian(count, _mean, _stddev); } +std::vector GaussianFloat16DataChef::generate(int32_t count) const +{ + auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count(); + auto seed = static_cast(time_stamp); + + std::minstd_rand rand{static_cast(seed)}; + std::normal_distribution dist{_mean, _stddev}; + + std::vector res; + + constexpr float max_cap = 1e9; + constexpr float min_cap = -1e9; + for (uint32_t n = 0; n < count; ++n) + { + float raw_value = dist(rand); + const float capped_value = std::max(min_cap, std::min(max_cap, raw_value)); + const uint16_t value = fp16_ieee_from_fp32_value(capped_value); + auto const arr = reinterpret_cast(&value); + + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + res.emplace_back(arr[b]); + } + } + + return res; +} + std::vector GaussianInt32DataChef::generate(int32_t count) const { return generate_gaussian(count, _mean, _stddev); @@ -136,4 +166,17 @@ std::unique_ptr GaussianUint8DataChefFactory::create(const Arguments & return std::unique_ptr{new GaussianUint8DataChef{mean, stddev}}; } +std::unique_ptr GaussianFloat16DataChefFactory::create(const Arguments &args) const +{ + if (args.count() != 2) + { + throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"}; + } + + auto const mean = to_number(args.value(0)); + auto const stddev = to_number(args.value(1)); + + return std::unique_ptr{new GaussianFloat16DataChef{mean, stddev}}; +} + } // namespace souschef diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt index 27f2463..79a3987 100644 --- a/compiler/tf2circle-conversion-test/CMakeLists.txt +++ b/compiler/tf2circle-conversion-test/CMakeLists.txt @@ -128,6 +128,10 @@ list(APPEND TEST_DEPS "${TEST_CONFIG}") # This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + # Run tests add_test( NAME tf2circle_conversion_test diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt index 48b098e..83596fa 100644 --- a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt +++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt @@ -132,6 +132,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}") # Generate dependencies add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_test( NAME tf2circle_dredd_pb_test COMMAND diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt index 789e585..427e575 100644 --- a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt @@ -175,6 +175,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}") # Generate dependencies add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_test( NAME tf2circle_dredd_pbtxt_test COMMAND diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt index 2fb8223..ad776a6 100644 --- a/compiler/tf2circle-model-test/CMakeLists.txt +++ b/compiler/tf2circle-model-test/CMakeLists.txt @@ -100,6 +100,10 @@ list(APPEND DEPS "${TEST_RUNNER_SCRIPT}") ### Generate dependencies add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + # NOTE This target is not built by default add_test( NAME tf2circle_model_test diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt index b75c507..ac9f14d 100644 --- a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt +++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt index 87cf783..95a296e 100644 --- a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt index 41974f7..a6c451e 100644 --- a/compiler/tf2tflite-value-pb-test/CMakeLists.txt +++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt index 2e76e21..fde3e60 100644 --- a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt index 0b47393..97aa07f 100644 --- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt +++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nncc_find_resource(TensorFlowTests) # diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py index 6b578ad..2bcf553 100755 --- a/compiler/tf2tfliteV2/tf2tfliteV2.py +++ b/compiler/tf2tfliteV2/tf2tfliteV2.py @@ -110,6 +110,12 @@ def _get_parser(): type=str, help="Names of the output arrays, comma-separated.") + # experimental options + parser.add_argument( + "--experimental_disable_batchmatmul_unfold", + action="store_true", + help="Experimental disable BatchMatMul unfold") + # Set default value parser.set_defaults(model_format="graph_def") return parser @@ -228,6 +234,9 @@ def _v2_convert(flags): keras_model = tf.keras.models.load_model(flags.input_path) converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) + if flags.experimental_disable_batchmatmul_unfold: + converter._experimental_disable_batchmatmul_unfold = True + converter.allow_custom_ops = True converter.experimental_new_converter = True diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt index 9e1cb72..2c6e3a1 100644 --- a/compiler/tfl-inspect/CMakeLists.txt +++ b/compiler/tfl-inspect/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_tflite) +if(NOT TARGET mio_tflite280) return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite280) set(DRIVER "driver/Driver.cpp") diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp index 3e62e0f..8505ff4 100644 --- a/compiler/tfl-inspect/driver/Driver.cpp +++ b/compiler/tfl-inspect/driver/Driver.cpp @@ -35,7 +35,7 @@ int entry(int argc, char **argv) .nargs(0) .help("Dump Conv2D series weight operators in tflite file"); arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file"); - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect"); + arser.add_argument("tflite").help("TFLite file to inspect"); try { diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt index 2fba335..5bead5b 100644 --- a/compiler/tfl-verify/CMakeLists.txt +++ b/compiler/tfl-verify/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_tflite) +if(NOT TARGET mio_tflite280) return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite280) file(GLOB_RECURSE SOURCES "src/*.cpp") diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp index 6d18976..6234549 100644 --- a/compiler/tfl-verify/src/Driver.cpp +++ b/compiler/tfl-verify/src/Driver.cpp @@ -25,7 +25,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify"); + arser.add_argument("tflite").help("TFLite file path to verify"); try { diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt index 948b1ce..6205ac6 100644 --- a/compiler/tflchef/CMakeLists.txt +++ b/compiler/tflchef/CMakeLists.txt @@ -20,4 +20,9 @@ add_subdirectory(core) add_subdirectory(tflite) # Tools add_subdirectory(tools) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_subdirectory(tests) diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp index 200c71e..f4dd4b3 100644 --- a/compiler/tflchef/core/src/Convert.cpp +++ b/compiler/tflchef/core/src/Convert.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,6 +63,8 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value) { case tflchef::FLOAT32: return tflite::TensorType_FLOAT32; + case tflchef::FLOAT16: + return tflite::TensorType_FLOAT16; case tflchef::INT32: return tflite::TensorType_INT32; case tflchef::UINT8: @@ -164,3 +167,222 @@ as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb, throw std::runtime_error("Unknown SparseIndexVector type"); } + +// namespace sparsity code referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +template +FormatConverter::FormatConverter(const std::vector &shape, + const std::vector &traversal_order, + const std::vector &format, + const std::vector &block_size, + const std::vector &block_map) + : dense_shape_(shape), traversal_order_(traversal_order), block_size_(block_size), + block_map_(block_map) +{ + dense_size_ = 1; + int block_dim = 0; + blocked_shape_.resize(shape.size()); + format_.resize(shape.size() + block_map.size()); + for (int i = 0; i < shape.size(); i++) + { + format_[i] = format[traversal_order[i]]; + dense_size_ *= shape[i]; + if (block_dim < block_map.size() && block_map[block_dim] == i) + { + blocked_shape_[i] = shape[i] / block_size[block_dim]; + block_dim++; + } + else + { + blocked_shape_[i] = shape[i]; + } + } + + // Only dense blocks are supported. + for (int i = 0; i < block_map.size(); i++) + { + format_[i + shape.size()] = kTfLiteDimDense; + } +} + +template bool FormatConverter::DenseToSparse(const T *src_data) +{ + int num_original_dims = dense_shape_.size(); + int num_block_dims = block_map_.size(); + int num_expanded_dims = num_original_dims + num_block_dims; + std::vector expanded_shape(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; i++) + { + if (i < num_original_dims) + { + expanded_shape[i] = blocked_shape_[i]; + } + else + { + expanded_shape[i] = block_size_[i - num_original_dims]; + } + } + + std::vector shape_offset(num_original_dims); + shape_offset[shape_offset.size() - 1] = 1; + for (int i = num_original_dims - 1; i > 0; --i) + { + shape_offset[i - 1] = shape_offset[i] * dense_shape_[i]; + } + + std::vector expanded_shape_offset(num_expanded_dims); + for (int i = 0; i < num_original_dims; ++i) + { + expanded_shape_offset[i] = shape_offset[i]; + } + for (int i = 0; i < num_block_dims; ++i) + { + int mapped_dim = block_map_[i]; + expanded_shape_offset[num_original_dims + i] = shape_offset[mapped_dim]; + expanded_shape_offset[mapped_dim] *= block_size_[i]; + } + + std::vector dst_ordered_offset(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; ++i) + { + dst_ordered_offset[i] = expanded_shape_offset[traversal_order_[i]]; + } + + std::vector dst_dim_has_nonzeroes(num_expanded_dims); + std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false); + std::vector inner_compressed_dim(num_expanded_dims); + int most_recent_compressed_dim = -1; + std::vector num_segments_of_next_compressed_dim(num_expanded_dims); + int segment_count = 1; + for (int i = num_expanded_dims - 1; i >= 0; --i) + { + inner_compressed_dim[i] = most_recent_compressed_dim; + if (format_[i] == kTfLiteDimSparseCSR) + { + most_recent_compressed_dim = i; + num_segments_of_next_compressed_dim[i] = segment_count; + segment_count = 1; + } + else + { + num_segments_of_next_compressed_dim[i] = -1; + segment_count *= expanded_shape[traversal_order_[i]]; + } + } + + dim_metadata_.resize(num_expanded_dims * 2); + std::vector dst_sparse_dims; + dst_sparse_dims.reserve(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; ++i) + { + dim_metadata_[i * 2].clear(); + dim_metadata_[i * 2 + 1].clear(); + if (format_[i] == kTfLiteDimDense) + { + // If dimension is dense, just store the shape. + dim_metadata_[i * 2].push_back(expanded_shape[traversal_order_[i]]); + } + else + { + dim_metadata_[i * 2].push_back(0); // Segment array always begins with 0. + dst_sparse_dims.push_back(i); // Add dimension to the sparse list. + } + } + + // This algorithm assumes that the block size is small enough for all the + // elements to fit in cache, so the strided accesses from different traversal + // order and the write-first-erase-later strategy shouldn't be too slow + int dst_dim_idx = num_expanded_dims; + std::vector coordinate(num_expanded_dims, 0); + int dense_tensor_idx = 0; + while (dst_dim_idx >= 0) + { + if (dst_dim_idx == num_expanded_dims) + { + // We have a complete coordinate. Add the element to the value array if it + // is not zero, or if the last dimension is dense. + if (!IsZero(src_data[dense_tensor_idx])) + { + data_.push_back(src_data[dense_tensor_idx]); + // Mark all sparse dimensions that their current indices have nonzeroes. + for (auto dst_dim : dst_sparse_dims) + { + if (!dst_dim_has_nonzeroes[dst_dim]) + { + // Only add the index to the indices array if the current nonzero + // is the first nonzero of the block. + dim_metadata_[2 * dst_dim + 1].push_back(coordinate[dst_dim]); + dst_dim_has_nonzeroes[dst_dim] = true; + } + } + } + else if (format_[num_expanded_dims - 1] == kTfLiteDimDense) + { + data_.push_back(src_data[dense_tensor_idx]); + } + --dst_dim_idx; + } + else + { + int original_dim_idx = traversal_order_[dst_dim_idx]; + int dim_size = expanded_shape[original_dim_idx]; + if (dst_dim_has_nonzeroes[dst_dim_idx]) + { + // If the previous block has nonzeroes, reset the flag to false since + // we have just moved to a new block. + dst_dim_has_nonzeroes[dst_dim_idx] = false; + } + else if (format_[dst_dim_idx] == kTfLiteDimSparseCSR) + { + // This block is empty. Delete unnecessary values if compressed. + int next_compressed_dim = inner_compressed_dim[dst_dim_idx]; + int erase_offset = dim_metadata_[2 * dst_dim_idx + 1].size() * + num_segments_of_next_compressed_dim[dst_dim_idx]; + if (next_compressed_dim >= 0) + { + auto &segments = dim_metadata_[2 * inner_compressed_dim[dst_dim_idx]]; + segments.erase(segments.begin() + 1 + erase_offset, segments.end()); + } + else + { + data_.erase(data_.begin() + erase_offset, data_.end()); + } + } + if (++coordinate[dst_dim_idx] < dim_size) + { + // The current dst_dim_idx is valid (not out of bound). + dense_tensor_idx += dst_ordered_offset[dst_dim_idx]; + ++dst_dim_idx; + } + else + { + // dst_dim_idx has reached its dim size. Update segment array and go + // back to incrementing the previous dimension (dst_dim_idx - 1). + if (format_[dst_dim_idx] == kTfLiteDimSparseCSR) + { + dim_metadata_[2 * dst_dim_idx].push_back(dim_metadata_[2 * dst_dim_idx + 1].size()); + } + coordinate[dst_dim_idx] = -1; + dense_tensor_idx -= dst_ordered_offset[dst_dim_idx] * dim_size; + --dst_dim_idx; + } + } + } + + return true; +} + +template bool FormatConverter::IsZero(const T val) +{ + return (val == static_cast(0)); +} + +template class FormatConverter; +template class FormatConverter; // float16 + +} // namespace sparsity diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h index 45c93d2..6e910ea 100644 --- a/compiler/tflchef/core/src/Convert.h +++ b/compiler/tflchef/core/src/Convert.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,4 +35,52 @@ flatbuffers::Offset as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb, const ::tflchef::TensorSparsity_IndexVec &value); +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +// Storage format of each dimension in a sparse tensor. +typedef enum TfLiteDimensionType +{ + kTfLiteDimDense = 0, + kTfLiteDimSparseCSR, +} TfLiteDimensionType; + +template class FormatConverter +{ +public: + FormatConverter(const std::vector &shape, const std::vector &traversal_order, + const std::vector &format, + const std::vector &block_size = {}, + const std::vector &block_map = {}); + + bool DenseToSparse(const T *src_data); + + const std::vector &GetData() { return data_; } + const std::vector> &GetDimMetadata() { return dim_metadata_; } + +private: + bool IsZero(const T val); + +private: + std::vector dense_shape_; + std::vector blocked_shape_; + size_t dense_size_; + std::vector traversal_order_; + std::vector format_; + std::vector block_size_; + std::vector block_map_; + std::vector> dim_metadata_; + std::vector data_; +}; + +extern template class FormatConverter; +extern template class FormatConverter; // float16 + +} // namespace sparsity + #endif // __CONVERT_H__ diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def index c634c04..28a5b76 100644 --- a/compiler/tflchef/core/src/DataChef.def +++ b/compiler/tflchef/core/src/DataChef.def @@ -21,3 +21,7 @@ DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory) DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory) DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory) DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory) + +// FLOAT16 support for only gaussian, explicit for now +DATA_CHEF(FLOAT16, explicit, ExplicitFloat16DataChefFactory) +DATA_CHEF(FLOAT16, gaussian, GaussianFloat16DataChefFactory) diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index 93b9334..a788adc 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -92,6 +92,7 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type) static DataChefRegistry string; static DataChefRegistry boolean; static DataChefRegistry s16; + static DataChefRegistry fp16; switch (type) { @@ -101,6 +102,8 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type) return s64; case tflchef::FLOAT32: return fp32; + case tflchef::FLOAT16: + return fp16; case tflchef::UINT8: return u8; case tflchef::STRING: @@ -207,6 +210,41 @@ struct CookParams std::string noname; }; +std::vector> +make_dim_metadata_vec(flatbuffers::FlatBufferBuilder *flatbuffer_builder, int32_t dims_count, + const std::vector &traversal_order_vec, + const std::vector &format_vec, + const std::vector> &dim_metadata_src) +{ + // Build sparsity parameter. + std::vector> dim_metadata_vec(dims_count); + for (int32_t i = 0; i < dims_count; i++) + { + const int32_t metadata_idx = 2 * i; + if (format_vec[traversal_order_vec[i]] == sparsity::kTfLiteDimSparseCSR) + { + auto array_segments = + tflite::CreateInt32Vector(*flatbuffer_builder, + flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx])) + .Union(); + auto array_indices = + tflite::CreateInt32Vector( + *flatbuffer_builder, flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx + 1])) + .Union(); + dim_metadata_vec[i] = + tflite::CreateDimensionMetadata(*flatbuffer_builder, tflite::DimensionType_SPARSE_CSR, 0, + tflite::SparseIndexVector_Int32Vector, array_segments, + tflite::SparseIndexVector_Int32Vector, array_indices); + } + else + { + dim_metadata_vec[i] = tflite::CreateDimensionMetadata( + *flatbuffer_builder, tflite::DimensionType_DENSE, dim_metadata_src[metadata_idx][0]); + } + } + return dim_metadata_vec; +} + template std::map cook_graph(const T &graph, CookParams &cp) { LOGGER(l); @@ -271,6 +309,8 @@ template std::map cook_graph(const T &graph, assert(operand.has_type()); + flatbuffers::Offset sparsity_index; + flatbuffers::Offset> shape; std::vector dims; if (operand.has_shape()) @@ -298,16 +338,125 @@ template std::map cook_graph(const T &graph, // Create Data int32_t count = (element_count(dims) > 0) ? element_count(dims) : filler.arg_size(); auto data_vec = chef->generate(count); - auto data = flatbuffer_builder->CreateVector(data_vec); - // Create Buffer - tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; - buffer_builder.add_data(data); - auto buffer = buffer_builder.Finish(); + if (operand.has_make_sparse() && operand.make_sparse()) + { + assert(not operand.has_sparsity()); + assert(operand.has_shape()); + + const int32_t dims_count = dims.size(); + std::vector traversal_order_vec; + std::vector format_vec; + for (int32_t o = 0; o < dims_count; ++o) + traversal_order_vec.push_back(o); + for (int32_t o = 0; o < dims_count - 1; ++o) + format_vec.push_back(sparsity::kTfLiteDimDense); + format_vec.push_back(sparsity::kTfLiteDimSparseCSR); + + if (operand.type() == tflchef::FLOAT32) + { + ::sparsity::FormatConverter converter(dims, traversal_order_vec, format_vec); + converter.DenseToSparse(reinterpret_cast(data_vec.data())); + const auto &sparse_data = converter.GetData(); + + std::vector sparse_uint8; + for (int c = 0; c < sparse_data.size(); ++c) + { + const float value = sparse_data.at(c); + const uint8_t *arr = reinterpret_cast(&value); + for (uint32_t b = 0; b < sizeof(float); ++b) + { + sparse_uint8.emplace_back(arr[b]); + } + } + auto data = flatbuffer_builder->CreateVector(sparse_uint8); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); + + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + + // save SparsityParameters + auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec); + + // Create block map + std::vector block_map_vec{}; + auto block_map = flatbuffer_builder->CreateVector(block_map_vec); + + // Create dimension metadata + const auto &dim_metadata_src = converter.GetDimMetadata(); + auto dim_metadata_vec = + make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec, + format_vec, dim_metadata_src); + auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec); + sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order, + block_map, dim_metadata); + } + else if (operand.type() == tflchef::FLOAT16) + { + ::sparsity::FormatConverter converter(dims, traversal_order_vec, format_vec); + converter.DenseToSparse(reinterpret_cast(data_vec.data())); + const auto &sparse_data = converter.GetData(); + + std::vector sparse_uint8; + for (int c = 0; c < sparse_data.size(); ++c) + { + const uint16_t value = sparse_data.at(c); + const uint8_t *arr = reinterpret_cast(&value); + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + sparse_uint8.emplace_back(arr[b]); + } + } + auto data = flatbuffer_builder->CreateVector(sparse_uint8); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); + + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + + // save SparsityParameters + auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec); + + // Create block map + std::vector block_map_vec{}; + auto block_map = flatbuffer_builder->CreateVector(block_map_vec); + + // Create dimension metadata + const auto &dim_metadata_src = converter.GetDimMetadata(); + auto dim_metadata_vec = + make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec, + format_vec, dim_metadata_src); + auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec); + sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order, + block_map, dim_metadata); + } + else + { + throw std::runtime_error{"NYI: unsupported operand type"}; + } + } + else + { + auto data = flatbuffer_builder->CreateVector(data_vec); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); - // Update Buffer Index & Vector - buffer_index = buffer_vec.size(); - buffer_vec.emplace_back(buffer); + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + } } else { @@ -384,8 +533,6 @@ template std::map cook_graph(const T &graph, quant_index = quant_builder.Finish(); } - flatbuffers::Offset sparsity_index; - if (operand.has_sparsity()) { const auto &sparsity = operand.sparsity(); diff --git a/compiler/tflchef/core/src/Op/Densify.cpp b/compiler/tflchef/core/src/Op/Densify.cpp new file mode 100644 index 0000000..63c4e20 --- /dev/null +++ b/compiler/tflchef/core/src/Op/Densify.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Densify.h" + +flatbuffers::Offset DensifyChef::value(flatbuffers::FlatBufferBuilder &fbb) const +{ + tflite::DensifyOptionsBuilder options_builder{fbb}; + + return options_builder.Finish().Union(); +} + +std::unique_ptr DensifyChefFactory::create(const tflchef::Operation *operation) const +{ + return std::unique_ptr{new DensifyChef{operation}}; +} diff --git a/compiler/tflchef/core/src/Op/Densify.h b/compiler/tflchef/core/src/Op/Densify.h new file mode 100644 index 0000000..f6af693 --- /dev/null +++ b/compiler/tflchef/core/src/Op/Densify.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OP_DENSIFY_H__ +#define __OP_DENSIFY_H__ + +#include "OpChef.h" + +class DensifyChef final : public OpChef +{ +public: + explicit DensifyChef(const tflchef::Operation *operation) : _operation{operation} + { + // DO NOTHING + } + +public: + tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DENSIFY; } + + tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DensifyOptions; } + + flatbuffers::Offset value(flatbuffers::FlatBufferBuilder &fbb) const override; + +private: + const tflchef::Operation *_operation; +}; + +struct DensifyChefFactory final : public OpChefFactory +{ + std::unique_ptr create(const tflchef::Operation *operation) const override; +}; + +#endif // __OP_DENSIFY_H__ diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def index beebd35..c19d00d 100644 --- a/compiler/tflchef/core/src/OpChef.def +++ b/compiler/tflchef/core/src/OpChef.def @@ -18,6 +18,7 @@ OP_CHEF(Ceil, CeilChefFactory) OP_CHEF(Concatenation, ConcatenationChefFactory) OP_CHEF(Conv2D, Conv2DChefFactory) OP_CHEF(Cos, CosChefFactory) +OP_CHEF(Densify, DensifyChefFactory) OP_CHEF(DepthToSpace, DepthToSpaceChefFactory) OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory) OP_CHEF(Dequantize, DequantizeChefFactory) diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h index 159019a..3cd3be5 100644 --- a/compiler/tflchef/core/src/OpChefs.h +++ b/compiler/tflchef/core/src/OpChefs.h @@ -31,6 +31,7 @@ #include "Op/Concatenation.h" #include "Op/Conv2D.h" #include "Op/Cos.h" +#include "Op/Densify.h" #include "Op/DepthToSpace.h" #include "Op/DepthwiseConv2D.h" #include "Op/Dequantize.h" diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto index 1abefaf..da4b692 100644 --- a/compiler/tflchef/proto/tflchef.proto +++ b/compiler/tflchef/proto/tflchef.proto @@ -15,6 +15,7 @@ package tflchef; // This enum value corresponds to TensorType in TensorFlow Lite schema enum TensorType { FLOAT32 = 0; + FLOAT16 = 1; INT32 = 2; UINT8 = 3; INT64 = 4; @@ -88,6 +89,12 @@ message Operand { optional TensorSparsity sparsity = 6; optional bool is_variable = 7 [default = false]; optional ShapeSignature shape_signature = 8; + // 'make_sparse' is to tell tflchef to make a sparse tensor + // as filling 'TensorSparsity' by hand can be difficult + // for now, last dimension will be SPARSE_CSR + // ex) shape [2, 3, 4] will have + // TraversalOrder [0, 1, 2] with [DENSE, DENSE, SPARSE_CSR] + optional bool make_sparse = 9 [default = false]; } // This enum value corresponds to Padding in TensorFlow Lite schema @@ -534,6 +541,10 @@ message FakeQuantOptions { optional bool narrow_range = 4 [default = false]; } +message DensifyOptions { + // NONE +} + message Operation { optional string type = 1; repeated string input = 2; @@ -650,6 +661,7 @@ message Operation { optional AddNOptions add_n_options = 207; optional MatMulOptions matmul_options = 208; optional MaxPoolWithArgmaxOptions max_pool_with_argmax_options = 209; + optional DensifyOptions densify_options = 210; // NOTE if there are more than two options with same type of Options // use the number not listed in the above reserve list } diff --git a/compiler/tflchef/tests/make_sparse/test.recipe b/compiler/tflchef/tests/make_sparse/test.recipe new file mode 100644 index 0000000..15cc93a --- /dev/null +++ b/compiler/tflchef/tests/make_sparse/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse" + type: FLOAT32 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "2" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "3" + } + make_sparse: true +} +operand { + name: "dense" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse" + output: "dense" +} +operation { + type: "Add" + input: "in" + input: "dense" + output: "out" + add_options { + activation: NONE + } +} +input: "in" +output: "out" diff --git a/compiler/tflchef/tests/make_sparse_f16/test.recipe b/compiler/tflchef/tests/make_sparse_f16/test.recipe new file mode 100644 index 0000000..5977a1d --- /dev/null +++ b/compiler/tflchef/tests/make_sparse_f16/test.recipe @@ -0,0 +1,54 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse16" + type: FLOAT16 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "2" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "3" + } + make_sparse: true +} +operand { + name: "dense16" + type: FLOAT16 + shape { dim: 4 dim: 4 } +} +operand { + name: "dense32" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse16" + output: "dense16" +} +operation { + type: "Dequantize" + input: "dense16" + output: "dense32" +} +operation { + type: "Add" + input: "in" + input: "dense32" + output: "out" + add_options { + activation: NONE + } +} +input: "in" +output: "out" diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt index 3c3352b..d9a20a2 100644 --- a/compiler/tflchef/tflite/CMakeLists.txt +++ b/compiler/tflchef/tflite/CMakeLists.txt @@ -3,6 +3,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_library(tflchef_tflite STATIC ${SOURCES}) target_include_directories(tflchef_tflite PUBLIC include) target_include_directories(tflchef_tflite PRIVATE src) +target_include_directories(tflchef_tflite PRIVATE src/Op/include) target_link_libraries(tflchef_tflite tflchef_proto) target_link_libraries(tflchef_tflite mio_tflite280) target_link_libraries(tflchef_tflite mio_tflite280_helper) diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp index f47e51d..2429876 100644 --- a/compiler/tflchef/tflite/src/Convert.cpp +++ b/compiler/tflchef/tflite/src/Convert.cpp @@ -35,8 +35,9 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type) return tflchef::BOOL; case tflite::TensorType_INT16: return tflchef::INT16; + case tflite::TensorType_FLOAT16: + return tflchef::FLOAT16; // TODO handle other types - // TensorType_FLOAT16 // TensorType_STRING // TensorType_COMPLEX64 default: diff --git a/compiler/tflchef/tflite/src/FillerHelper.cpp b/compiler/tflchef/tflite/src/FillerHelper.cpp index cf96d2e..1ac99ad 100644 --- a/compiler/tflchef/tflite/src/FillerHelper.cpp +++ b/compiler/tflchef/tflite/src/FillerHelper.cpp @@ -48,3 +48,18 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import) } } // namespace tflchef + +// helpers of common codes for filling inputs +namespace tflchef +{ + +void fill_two_inputs(const tflite::Operator *op, TFliteImport *import) +{ + const std::vector &inputs = as_index_vector(op->inputs()); + assert(inputs.size() == 2); + + fill_tensor_to_import(inputs[0], import); + fill_tensor_to_import(inputs[1], import); +} + +} // namespace tflchef diff --git a/compiler/tflchef/tflite/src/FillerHelper.h b/compiler/tflchef/tflite/src/FillerHelper.h index 053a5c1..e96ae73 100644 --- a/compiler/tflchef/tflite/src/FillerHelper.h +++ b/compiler/tflchef/tflite/src/FillerHelper.h @@ -28,4 +28,12 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import); } // namespace tflchef +// helpers of common codes for filling inputs +namespace tflchef +{ + +void fill_two_inputs(const tflite::Operator *op, TFliteImport *import); + +} // namespace tflchef + #endif // __FILLER_HELPER_H__ diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp index 3e880a6..23d3606 100644 --- a/compiler/tflchef/tflite/src/Op/Add.cpp +++ b/compiler/tflchef/tflite/src/Op/Add.cpp @@ -27,11 +27,7 @@ void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import, { // Add may have constant input - const std::vector &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp index d52caf0..65e4c2c 100644 --- a/compiler/tflchef/tflite/src/Op/Maximum.cpp +++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp @@ -25,11 +25,7 @@ namespace tflchef void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - const std::vector &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp index 6440f1d..b4d255c 100644 --- a/compiler/tflchef/tflite/src/Op/Minimum.cpp +++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp @@ -25,11 +25,7 @@ namespace tflchef void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - const std::vector &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Mul.cpp b/compiler/tflchef/tflite/src/Op/Mul.cpp index 9faa4ac..1145ff7 100644 --- a/compiler/tflchef/tflite/src/Op/Mul.cpp +++ b/compiler/tflchef/tflite/src/Op/Mul.cpp @@ -27,11 +27,7 @@ void TFliteOpMul::filler(const tflite::Operator *op, TFliteImport *import, { // Mul may have constant input - const std::vector &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMul::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp index ad99219..4f096ce 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp @@ -38,7 +38,7 @@ void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImpor for (int32_t index = 2; index < 5; ++index) { - fill_tensor_to_import(index, import); + fill_tensor_to_import(inputs[index], import); } } diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp index db7f4c9..332cba0 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp @@ -41,7 +41,7 @@ void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImpor for (int32_t index = 2; index < 6; ++index) { - fill_tensor_to_import(index, import); + fill_tensor_to_import(inputs[index], import); } } diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp index 0b1c9f3..a6b657f 100644 --- a/compiler/tflchef/tflite/src/Op/PadV2.cpp +++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp @@ -16,6 +16,7 @@ #include "PadV2.h" +#include "Convert.h" #include "FillerHelper.h" namespace tflchef @@ -24,9 +25,11 @@ namespace tflchef void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { + const std::vector &inputs = as_index_vector(op->inputs()); + // Filler for paddings and constant_values - fill_tensor_to_import(1, import); - fill_tensor_to_import(2, import); + fill_tensor_to_import(inputs[1], import); + fill_tensor_to_import(inputs[2], import); } tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp index 548a09a..ec09a69 100644 --- a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp +++ b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp @@ -25,9 +25,11 @@ namespace tflchef void TFliteOpScatterNd::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { + const std::vector &inputs = as_index_vector(op->inputs()); + // Filler for indices and shape - fill_tensor_to_import(0, import); - fill_tensor_to_import(2, import); + fill_tensor_to_import(inputs[0], import); + fill_tensor_to_import(inputs[2], import); } tflchef::Operation *TFliteOpScatterNd::build(const tflite::Operator *, TFliteImport *, diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp index a975ca4..bc45a94 100644 --- a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp +++ b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp @@ -16,6 +16,7 @@ #include "SegmentSum.h" +#include "Convert.h" #include "FillerHelper.h" namespace tflchef @@ -24,8 +25,10 @@ namespace tflchef void TFliteOpSegmentSum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - // Filler for indices and shape - fill_tensor_to_import(1, import); + const std::vector &inputs = as_index_vector(op->inputs()); + + // Filler for segment_ids + fill_tensor_to_import(inputs[1], import); } tflchef::Operation *TFliteOpSegmentSum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp index 0a08bbf..584be0a 100644 --- a/compiler/tflchef/tflite/src/Op/Sub.cpp +++ b/compiler/tflchef/tflite/src/Op/Sub.cpp @@ -27,11 +27,7 @@ void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import, { // Sub may have constant input - const std::vector &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/include/Abs.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Abs.h rename to compiler/tflchef/tflite/src/Op/include/Abs.h diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/include/Add.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Add.h rename to compiler/tflchef/tflite/src/Op/include/Add.h diff --git a/compiler/tflchef/tflite/src/Op/AddN.h b/compiler/tflchef/tflite/src/Op/include/AddN.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/AddN.h rename to compiler/tflchef/tflite/src/Op/include/AddN.h diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/include/ArgMax.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ArgMax.h rename to compiler/tflchef/tflite/src/Op/include/ArgMax.h diff --git a/compiler/tflchef/tflite/src/Op/ArgMin.h b/compiler/tflchef/tflite/src/Op/include/ArgMin.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ArgMin.h rename to compiler/tflchef/tflite/src/Op/include/ArgMin.h diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/AveragePool2D.h rename to compiler/tflchef/tflite/src/Op/include/AveragePool2D.h diff --git a/compiler/tflchef/tflite/src/Op/BatchMatMul.h b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/BatchMatMul.h rename to compiler/tflchef/tflite/src/Op/include/BatchMatMul.h diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/BatchToSpaceND.h rename to compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h rename to compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h diff --git a/compiler/tflchef/tflite/src/Op/Cast.h b/compiler/tflchef/tflite/src/Op/include/Cast.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Cast.h rename to compiler/tflchef/tflite/src/Op/include/Cast.h diff --git a/compiler/tflchef/tflite/src/Op/Ceil.h b/compiler/tflchef/tflite/src/Op/include/Ceil.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Ceil.h rename to compiler/tflchef/tflite/src/Op/include/Ceil.h diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/include/Concatenation.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Concatenation.h rename to compiler/tflchef/tflite/src/Op/include/Concatenation.h diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/include/Conv2D.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Conv2D.h rename to compiler/tflchef/tflite/src/Op/include/Conv2D.h diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/include/Cos.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Cos.h rename to compiler/tflchef/tflite/src/Op/include/Cos.h diff --git a/compiler/tflchef/tflite/src/Op/DepthToSpace.h b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/DepthToSpace.h rename to compiler/tflchef/tflite/src/Op/include/DepthToSpace.h diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h rename to compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h diff --git a/compiler/tflchef/tflite/src/Op/Dequantize.h b/compiler/tflchef/tflite/src/Op/include/Dequantize.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Dequantize.h rename to compiler/tflchef/tflite/src/Op/include/Dequantize.h diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/include/Div.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Div.h rename to compiler/tflchef/tflite/src/Op/include/Div.h diff --git a/compiler/tflchef/tflite/src/Op/ELU.h b/compiler/tflchef/tflite/src/Op/include/ELU.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ELU.h rename to compiler/tflchef/tflite/src/Op/include/ELU.h diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/include/Equal.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Equal.h rename to compiler/tflchef/tflite/src/Op/include/Equal.h diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/include/Exp.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Exp.h rename to compiler/tflchef/tflite/src/Op/include/Exp.h diff --git a/compiler/tflchef/tflite/src/Op/ExpandDims.h b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ExpandDims.h rename to compiler/tflchef/tflite/src/Op/include/ExpandDims.h diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.h b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/FakeQuant.h rename to compiler/tflchef/tflite/src/Op/include/FakeQuant.h diff --git a/compiler/tflchef/tflite/src/Op/Fill.h b/compiler/tflchef/tflite/src/Op/include/Fill.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Fill.h rename to compiler/tflchef/tflite/src/Op/include/Fill.h diff --git a/compiler/tflchef/tflite/src/Op/Floor.h b/compiler/tflchef/tflite/src/Op/include/Floor.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Floor.h rename to compiler/tflchef/tflite/src/Op/include/Floor.h diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/FloorDiv.h rename to compiler/tflchef/tflite/src/Op/include/FloorDiv.h diff --git a/compiler/tflchef/tflite/src/Op/FloorMod.h b/compiler/tflchef/tflite/src/Op/include/FloorMod.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/FloorMod.h rename to compiler/tflchef/tflite/src/Op/include/FloorMod.h diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/FullyConnected.h rename to compiler/tflchef/tflite/src/Op/include/FullyConnected.h diff --git a/compiler/tflchef/tflite/src/Op/Gather.h b/compiler/tflchef/tflite/src/Op/include/Gather.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Gather.h rename to compiler/tflchef/tflite/src/Op/include/Gather.h diff --git a/compiler/tflchef/tflite/src/Op/GatherNd.h b/compiler/tflchef/tflite/src/Op/include/GatherNd.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/GatherNd.h rename to compiler/tflchef/tflite/src/Op/include/GatherNd.h diff --git a/compiler/tflchef/tflite/src/Op/Greater.h b/compiler/tflchef/tflite/src/Op/include/Greater.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Greater.h rename to compiler/tflchef/tflite/src/Op/include/Greater.h diff --git a/compiler/tflchef/tflite/src/Op/GreaterEqual.h b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/GreaterEqual.h rename to compiler/tflchef/tflite/src/Op/include/GreaterEqual.h diff --git a/compiler/tflchef/tflite/src/Op/L2Normalize.h b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/L2Normalize.h rename to compiler/tflchef/tflite/src/Op/include/L2Normalize.h diff --git a/compiler/tflchef/tflite/src/Op/L2Pool2D.h b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/L2Pool2D.h rename to compiler/tflchef/tflite/src/Op/include/L2Pool2D.h diff --git a/compiler/tflchef/tflite/src/Op/LeakyRelu.h b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LeakyRelu.h rename to compiler/tflchef/tflite/src/Op/include/LeakyRelu.h diff --git a/compiler/tflchef/tflite/src/Op/Less.h b/compiler/tflchef/tflite/src/Op/include/Less.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Less.h rename to compiler/tflchef/tflite/src/Op/include/Less.h diff --git a/compiler/tflchef/tflite/src/Op/LessEqual.h b/compiler/tflchef/tflite/src/Op/include/LessEqual.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LessEqual.h rename to compiler/tflchef/tflite/src/Op/include/LessEqual.h diff --git a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h rename to compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h diff --git a/compiler/tflchef/tflite/src/Op/Log.h b/compiler/tflchef/tflite/src/Op/include/Log.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Log.h rename to compiler/tflchef/tflite/src/Op/include/Log.h diff --git a/compiler/tflchef/tflite/src/Op/LogSoftmax.h b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LogSoftmax.h rename to compiler/tflchef/tflite/src/Op/include/LogSoftmax.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalAnd.h b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LogicalAnd.h rename to compiler/tflchef/tflite/src/Op/include/LogicalAnd.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LogicalNot.h rename to compiler/tflchef/tflite/src/Op/include/LogicalNot.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/LogicalOr.h rename to compiler/tflchef/tflite/src/Op/include/LogicalOr.h diff --git a/compiler/tflchef/tflite/src/Op/Logistic.h b/compiler/tflchef/tflite/src/Op/include/Logistic.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Logistic.h rename to compiler/tflchef/tflite/src/Op/include/Logistic.h diff --git a/compiler/tflchef/tflite/src/Op/MatrixDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/MatrixDiag.h rename to compiler/tflchef/tflite/src/Op/include/MatrixDiag.h diff --git a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/MatrixSetDiag.h rename to compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/MaxPool2D.h rename to compiler/tflchef/tflite/src/Op/include/MaxPool2D.h diff --git a/compiler/tflchef/tflite/src/Op/Maximum.h b/compiler/tflchef/tflite/src/Op/include/Maximum.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Maximum.h rename to compiler/tflchef/tflite/src/Op/include/Maximum.h diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/include/Mean.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Mean.h rename to compiler/tflchef/tflite/src/Op/include/Mean.h diff --git a/compiler/tflchef/tflite/src/Op/Minimum.h b/compiler/tflchef/tflite/src/Op/include/Minimum.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Minimum.h rename to compiler/tflchef/tflite/src/Op/include/Minimum.h diff --git a/compiler/tflchef/tflite/src/Op/MirrorPad.h b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/MirrorPad.h rename to compiler/tflchef/tflite/src/Op/include/MirrorPad.h diff --git a/compiler/tflchef/tflite/src/Op/Mul.h b/compiler/tflchef/tflite/src/Op/include/Mul.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Mul.h rename to compiler/tflchef/tflite/src/Op/include/Mul.h diff --git a/compiler/tflchef/tflite/src/Op/Neg.h b/compiler/tflchef/tflite/src/Op/include/Neg.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Neg.h rename to compiler/tflchef/tflite/src/Op/include/Neg.h diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h rename to compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h rename to compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h diff --git a/compiler/tflchef/tflite/src/Op/NotEqual.h b/compiler/tflchef/tflite/src/Op/include/NotEqual.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/NotEqual.h rename to compiler/tflchef/tflite/src/Op/include/NotEqual.h diff --git a/compiler/tflchef/tflite/src/Op/OneHot.h b/compiler/tflchef/tflite/src/Op/include/OneHot.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/OneHot.h rename to compiler/tflchef/tflite/src/Op/include/OneHot.h diff --git a/compiler/tflchef/tflite/src/Op/PRelu.h b/compiler/tflchef/tflite/src/Op/include/PRelu.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/PRelu.h rename to compiler/tflchef/tflite/src/Op/include/PRelu.h diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/include/Pack.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Pack.h rename to compiler/tflchef/tflite/src/Op/include/Pack.h diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/include/Pad.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Pad.h rename to compiler/tflchef/tflite/src/Op/include/Pad.h diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/include/PadV2.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/PadV2.h rename to compiler/tflchef/tflite/src/Op/include/PadV2.h diff --git a/compiler/tflchef/tflite/src/Op/Pow.h b/compiler/tflchef/tflite/src/Op/include/Pow.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Pow.h rename to compiler/tflchef/tflite/src/Op/include/Pow.h diff --git a/compiler/tflchef/tflite/src/Op/Quantize.h b/compiler/tflchef/tflite/src/Op/include/Quantize.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Quantize.h rename to compiler/tflchef/tflite/src/Op/include/Quantize.h diff --git a/compiler/tflchef/tflite/src/Op/Range.h b/compiler/tflchef/tflite/src/Op/include/Range.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Range.h rename to compiler/tflchef/tflite/src/Op/include/Range.h diff --git a/compiler/tflchef/tflite/src/Op/Rank.h b/compiler/tflchef/tflite/src/Op/include/Rank.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Rank.h rename to compiler/tflchef/tflite/src/Op/include/Rank.h diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/include/ReLU.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReLU.h rename to compiler/tflchef/tflite/src/Op/include/ReLU.h diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/include/ReLU6.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReLU6.h rename to compiler/tflchef/tflite/src/Op/include/ReLU6.h diff --git a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReLUN1To1.h rename to compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceAny.h b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReduceAny.h rename to compiler/tflchef/tflite/src/Op/include/ReduceAny.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceMax.h b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReduceMax.h rename to compiler/tflchef/tflite/src/Op/include/ReduceMax.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceMin.h b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReduceMin.h rename to compiler/tflchef/tflite/src/Op/include/ReduceMin.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceProd.h b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReduceProd.h rename to compiler/tflchef/tflite/src/Op/include/ReduceProd.h diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/include/Reshape.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Reshape.h rename to compiler/tflchef/tflite/src/Op/include/Reshape.h diff --git a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ResizeBilinear.h rename to compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h diff --git a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h rename to compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h diff --git a/compiler/tflchef/tflite/src/Op/ReverseSequence.h b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReverseSequence.h rename to compiler/tflchef/tflite/src/Op/include/ReverseSequence.h diff --git a/compiler/tflchef/tflite/src/Op/ReverseV2.h b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ReverseV2.h rename to compiler/tflchef/tflite/src/Op/include/ReverseV2.h diff --git a/compiler/tflchef/tflite/src/Op/Round.h b/compiler/tflchef/tflite/src/Op/include/Round.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Round.h rename to compiler/tflchef/tflite/src/Op/include/Round.h diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Rsqrt.h rename to compiler/tflchef/tflite/src/Op/include/Rsqrt.h diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/include/SVDF.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SVDF.h rename to compiler/tflchef/tflite/src/Op/include/SVDF.h diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.h b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ScatterNd.h rename to compiler/tflchef/tflite/src/Op/include/ScatterNd.h diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.h b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SegmentSum.h rename to compiler/tflchef/tflite/src/Op/include/SegmentSum.h diff --git a/compiler/tflchef/tflite/src/Op/Select.h b/compiler/tflchef/tflite/src/Op/include/Select.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Select.h rename to compiler/tflchef/tflite/src/Op/include/Select.h diff --git a/compiler/tflchef/tflite/src/Op/SelectV2.h b/compiler/tflchef/tflite/src/Op/include/SelectV2.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SelectV2.h rename to compiler/tflchef/tflite/src/Op/include/SelectV2.h diff --git a/compiler/tflchef/tflite/src/Op/Shape.h b/compiler/tflchef/tflite/src/Op/include/Shape.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Shape.h rename to compiler/tflchef/tflite/src/Op/include/Shape.h diff --git a/compiler/tflchef/tflite/src/Op/Sin.h b/compiler/tflchef/tflite/src/Op/include/Sin.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Sin.h rename to compiler/tflchef/tflite/src/Op/include/Sin.h diff --git a/compiler/tflchef/tflite/src/Op/Slice.h b/compiler/tflchef/tflite/src/Op/include/Slice.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Slice.h rename to compiler/tflchef/tflite/src/Op/include/Slice.h diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/include/Softmax.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Softmax.h rename to compiler/tflchef/tflite/src/Op/include/Softmax.h diff --git a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SpaceToBatchND.h rename to compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h diff --git a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SpaceToDepth.h rename to compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h diff --git a/compiler/tflchef/tflite/src/Op/SparseToDense.h b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SparseToDense.h rename to compiler/tflchef/tflite/src/Op/include/SparseToDense.h diff --git a/compiler/tflchef/tflite/src/Op/Split.h b/compiler/tflchef/tflite/src/Op/include/Split.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Split.h rename to compiler/tflchef/tflite/src/Op/include/Split.h diff --git a/compiler/tflchef/tflite/src/Op/SplitV.h b/compiler/tflchef/tflite/src/Op/include/SplitV.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SplitV.h rename to compiler/tflchef/tflite/src/Op/include/SplitV.h diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/include/Sqrt.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Sqrt.h rename to compiler/tflchef/tflite/src/Op/include/Sqrt.h diff --git a/compiler/tflchef/tflite/src/Op/Square.h b/compiler/tflchef/tflite/src/Op/include/Square.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Square.h rename to compiler/tflchef/tflite/src/Op/include/Square.h diff --git a/compiler/tflchef/tflite/src/Op/SquaredDifference.h b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/SquaredDifference.h rename to compiler/tflchef/tflite/src/Op/include/SquaredDifference.h diff --git a/compiler/tflchef/tflite/src/Op/Squeeze.h b/compiler/tflchef/tflite/src/Op/include/Squeeze.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Squeeze.h rename to compiler/tflchef/tflite/src/Op/include/Squeeze.h diff --git a/compiler/tflchef/tflite/src/Op/StridedSlice.h b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/StridedSlice.h rename to compiler/tflchef/tflite/src/Op/include/StridedSlice.h diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/include/Sub.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Sub.h rename to compiler/tflchef/tflite/src/Op/include/Sub.h diff --git a/compiler/tflchef/tflite/src/Op/Sum.h b/compiler/tflchef/tflite/src/Op/include/Sum.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Sum.h rename to compiler/tflchef/tflite/src/Op/include/Sum.h diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/include/Tanh.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Tanh.h rename to compiler/tflchef/tflite/src/Op/include/Tanh.h diff --git a/compiler/tflchef/tflite/src/Op/Tile.h b/compiler/tflchef/tflite/src/Op/include/Tile.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Tile.h rename to compiler/tflchef/tflite/src/Op/include/Tile.h diff --git a/compiler/tflchef/tflite/src/Op/TopKV2.h b/compiler/tflchef/tflite/src/Op/include/TopKV2.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/TopKV2.h rename to compiler/tflchef/tflite/src/Op/include/TopKV2.h diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/include/Transpose.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Transpose.h rename to compiler/tflchef/tflite/src/Op/include/Transpose.h diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.h b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/TransposeConv.h rename to compiler/tflchef/tflite/src/Op/include/TransposeConv.h diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h rename to compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h diff --git a/compiler/tflchef/tflite/src/Op/Unique.h b/compiler/tflchef/tflite/src/Op/include/Unique.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Unique.h rename to compiler/tflchef/tflite/src/Op/include/Unique.h diff --git a/compiler/tflchef/tflite/src/Op/Unpack.h b/compiler/tflchef/tflite/src/Op/include/Unpack.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Unpack.h rename to compiler/tflchef/tflite/src/Op/include/Unpack.h diff --git a/compiler/tflchef/tflite/src/Op/Where.h b/compiler/tflchef/tflite/src/Op/include/Where.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/Where.h rename to compiler/tflchef/tflite/src/Op/include/Where.h diff --git a/compiler/tflchef/tflite/src/Op/ZerosLike.h b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h similarity index 100% rename from compiler/tflchef/tflite/src/Op/ZerosLike.h rename to compiler/tflchef/tflite/src/Op/include/ZerosLike.h diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h index b38b35a..1b9d420 100644 --- a/compiler/tflchef/tflite/src/TFliteOpChefs.h +++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h @@ -18,115 +18,115 @@ #define __TFLITE_OP_CHEFS_H__ // In alphabet order -#include "Op/Abs.h" -#include "Op/Add.h" -#include "Op/AddN.h" -#include "Op/ArgMax.h" -#include "Op/ArgMin.h" -#include "Op/AveragePool2D.h" -#include "Op/BatchMatMul.h" -#include "Op/BatchToSpaceND.h" -#include "Op/BidirectionalSequenceLSTM.h" -#include "Op/Cast.h" -#include "Op/Ceil.h" -#include "Op/Concatenation.h" -#include "Op/Conv2D.h" -#include "Op/Cos.h" -#include "Op/DepthToSpace.h" -#include "Op/DepthwiseConv2D.h" -#include "Op/Dequantize.h" -#include "Op/Div.h" -#include "Op/ELU.h" -#include "Op/Equal.h" -#include "Op/Exp.h" -#include "Op/ExpandDims.h" -#include "Op/FakeQuant.h" -#include "Op/Fill.h" -#include "Op/Floor.h" -#include "Op/FloorDiv.h" -#include "Op/FloorMod.h" -#include "Op/FullyConnected.h" -#include "Op/Gather.h" -#include "Op/GatherNd.h" -#include "Op/Greater.h" -#include "Op/GreaterEqual.h" -#include "Op/L2Normalize.h" -#include "Op/L2Pool2D.h" -#include "Op/LeakyRelu.h" -#include "Op/Less.h" -#include "Op/LessEqual.h" -#include "Op/LocalResponseNormalization.h" -#include "Op/Log.h" -#include "Op/LogicalAnd.h" -#include "Op/LogicalNot.h" -#include "Op/LogicalOr.h" -#include "Op/Logistic.h" -#include "Op/LogSoftmax.h" -#include "Op/MatrixDiag.h" -#include "Op/MatrixSetDiag.h" -#include "Op/Maximum.h" -#include "Op/MaxPool2D.h" -#include "Op/Mean.h" -#include "Op/Minimum.h" -#include "Op/MirrorPad.h" -#include "Op/Mul.h" -#include "Op/Neg.h" -#include "Op/NonMaxSuppressionV4.h" -#include "Op/NonMaxSuppressionV5.h" -#include "Op/NotEqual.h" -#include "Op/OneHot.h" -#include "Op/Pack.h" -#include "Op/Pad.h" -#include "Op/PadV2.h" -#include "Op/Pow.h" -#include "Op/PRelu.h" -#include "Op/Quantize.h" -#include "Op/Range.h" -#include "Op/Rank.h" -#include "Op/ReduceAny.h" -#include "Op/ReduceMax.h" -#include "Op/ReduceMin.h" -#include "Op/ReduceProd.h" -#include "Op/ReLU.h" -#include "Op/ReLU6.h" -#include "Op/ReLUN1To1.h" -#include "Op/Reshape.h" -#include "Op/ResizeBilinear.h" -#include "Op/ResizeNearestNeighbor.h" -#include "Op/ReverseSequence.h" -#include "Op/ReverseV2.h" -#include "Op/Round.h" -#include "Op/Rsqrt.h" -#include "Op/ScatterNd.h" -#include "Op/SegmentSum.h" -#include "Op/Select.h" -#include "Op/SelectV2.h" -#include "Op/Shape.h" -#include "Op/Sin.h" -#include "Op/Slice.h" -#include "Op/Softmax.h" -#include "Op/SpaceToBatchND.h" -#include "Op/SpaceToDepth.h" -#include "Op/SparseToDense.h" -#include "Op/Split.h" -#include "Op/SplitV.h" -#include "Op/Sqrt.h" -#include "Op/Square.h" -#include "Op/SquaredDifference.h" -#include "Op/Squeeze.h" -#include "Op/StridedSlice.h" -#include "Op/Sub.h" -#include "Op/Sum.h" -#include "Op/SVDF.h" -#include "Op/Tanh.h" -#include "Op/Tile.h" -#include "Op/TopKV2.h" -#include "Op/Transpose.h" -#include "Op/TransposeConv.h" -#include "Op/UnidirectionalSequenceLSTM.h" -#include "Op/Unique.h" -#include "Op/Unpack.h" -#include "Op/Where.h" -#include "Op/ZerosLike.h" +#include "Op/include/Abs.h" +#include "Op/include/Add.h" +#include "Op/include/AddN.h" +#include "Op/include/ArgMax.h" +#include "Op/include/ArgMin.h" +#include "Op/include/AveragePool2D.h" +#include "Op/include/BatchMatMul.h" +#include "Op/include/BatchToSpaceND.h" +#include "Op/include/BidirectionalSequenceLSTM.h" +#include "Op/include/Cast.h" +#include "Op/include/Ceil.h" +#include "Op/include/Concatenation.h" +#include "Op/include/Conv2D.h" +#include "Op/include/Cos.h" +#include "Op/include/DepthToSpace.h" +#include "Op/include/DepthwiseConv2D.h" +#include "Op/include/Dequantize.h" +#include "Op/include/Div.h" +#include "Op/include/ELU.h" +#include "Op/include/Equal.h" +#include "Op/include/Exp.h" +#include "Op/include/ExpandDims.h" +#include "Op/include/FakeQuant.h" +#include "Op/include/Fill.h" +#include "Op/include/Floor.h" +#include "Op/include/FloorDiv.h" +#include "Op/include/FloorMod.h" +#include "Op/include/FullyConnected.h" +#include "Op/include/Gather.h" +#include "Op/include/GatherNd.h" +#include "Op/include/Greater.h" +#include "Op/include/GreaterEqual.h" +#include "Op/include/L2Normalize.h" +#include "Op/include/L2Pool2D.h" +#include "Op/include/LeakyRelu.h" +#include "Op/include/Less.h" +#include "Op/include/LessEqual.h" +#include "Op/include/LocalResponseNormalization.h" +#include "Op/include/Log.h" +#include "Op/include/LogicalAnd.h" +#include "Op/include/LogicalNot.h" +#include "Op/include/LogicalOr.h" +#include "Op/include/Logistic.h" +#include "Op/include/LogSoftmax.h" +#include "Op/include/MatrixDiag.h" +#include "Op/include/MatrixSetDiag.h" +#include "Op/include/Maximum.h" +#include "Op/include/MaxPool2D.h" +#include "Op/include/Mean.h" +#include "Op/include/Minimum.h" +#include "Op/include/MirrorPad.h" +#include "Op/include/Mul.h" +#include "Op/include/Neg.h" +#include "Op/include/NonMaxSuppressionV4.h" +#include "Op/include/NonMaxSuppressionV5.h" +#include "Op/include/NotEqual.h" +#include "Op/include/OneHot.h" +#include "Op/include/Pack.h" +#include "Op/include/Pad.h" +#include "Op/include/PadV2.h" +#include "Op/include/Pow.h" +#include "Op/include/PRelu.h" +#include "Op/include/Quantize.h" +#include "Op/include/Range.h" +#include "Op/include/Rank.h" +#include "Op/include/ReduceAny.h" +#include "Op/include/ReduceMax.h" +#include "Op/include/ReduceMin.h" +#include "Op/include/ReduceProd.h" +#include "Op/include/ReLU.h" +#include "Op/include/ReLU6.h" +#include "Op/include/ReLUN1To1.h" +#include "Op/include/Reshape.h" +#include "Op/include/ResizeBilinear.h" +#include "Op/include/ResizeNearestNeighbor.h" +#include "Op/include/ReverseSequence.h" +#include "Op/include/ReverseV2.h" +#include "Op/include/Round.h" +#include "Op/include/Rsqrt.h" +#include "Op/include/ScatterNd.h" +#include "Op/include/SegmentSum.h" +#include "Op/include/Select.h" +#include "Op/include/SelectV2.h" +#include "Op/include/Shape.h" +#include "Op/include/Sin.h" +#include "Op/include/Slice.h" +#include "Op/include/Softmax.h" +#include "Op/include/SpaceToBatchND.h" +#include "Op/include/SpaceToDepth.h" +#include "Op/include/SparseToDense.h" +#include "Op/include/Split.h" +#include "Op/include/SplitV.h" +#include "Op/include/Sqrt.h" +#include "Op/include/Square.h" +#include "Op/include/SquaredDifference.h" +#include "Op/include/Squeeze.h" +#include "Op/include/StridedSlice.h" +#include "Op/include/Sub.h" +#include "Op/include/Sum.h" +#include "Op/include/SVDF.h" +#include "Op/include/Tanh.h" +#include "Op/include/Tile.h" +#include "Op/include/TopKV2.h" +#include "Op/include/Transpose.h" +#include "Op/include/TransposeConv.h" +#include "Op/include/UnidirectionalSequenceLSTM.h" +#include "Op/include/Unique.h" +#include "Op/include/Unpack.h" +#include "Op/include/Where.h" +#include "Op/include/ZerosLike.h" #endif // __TFLITE_OP_CHEFS_H__ diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp index d4605ce..f6c6789 100644 --- a/compiler/tflchef/tools/file/Driver.cpp +++ b/compiler/tflchef/tools/file/Driver.cpp @@ -28,10 +28,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("recipe") - .type(arser::DataType::STR) - .help("Source recipe file path to convert"); - arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path"); + arser.add_argument("recipe").help("Source recipe file path to convert"); + arser.add_argument("tflite").help("Target tflite file path"); try { diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp index 1451e8b..119bee6 100644 --- a/compiler/tflchef/tools/reverse/Driver.cpp +++ b/compiler/tflchef/tools/reverse/Driver.cpp @@ -25,10 +25,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite") - .type(arser::DataType::STR) - .help("Source tflite file path to convert"); - arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path"); + arser.add_argument("tflite").help("Source tflite file path to convert"); + arser.add_argument("recipe").help("Target recipe file path"); try { diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt index fac0be6..4102326 100644 --- a/compiler/tfldump/CMakeLists.txt +++ b/compiler/tfldump/CMakeLists.txt @@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(tfldump ${DRIVER} ${SOURCES}) target_include_directories(tfldump PRIVATE include) target_link_libraries(tfldump arser) +target_link_libraries(tfldump foder) target_link_libraries(tfldump mio_tflite280) target_link_libraries(tfldump mio_tflite280_helper) target_link_libraries(tfldump safemain) diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp index 38c9c06..a3e748b 100644 --- a/compiler/tfldump/driver/Driver.cpp +++ b/compiler/tfldump/driver/Driver.cpp @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include @@ -23,7 +23,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to dump"); + arser.add_argument("tflite").help("TFLite file to dump"); try { @@ -38,14 +38,9 @@ int entry(int argc, char **argv) std::string tflite_path = arser.get("tflite"); // Load TF lite model from a tflite file - std::unique_ptr model = tflread::load_tflite(tflite_path); - if (model == nullptr) - { - std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl; - return 255; - } - - const tflite::Model *tflmodel = model->model(); + foder::FileLoader fileLoader{tflite_path}; + std::vector modelData = fileLoader.load(); + const tflite::Model *tflmodel = tflite::GetModel(modelData.data()); if (tflmodel == nullptr) { std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl; diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tfldump/include/tflread/Model.h deleted file mode 100644 index c6e4a94..0000000 --- a/compiler/tfldump/include/tflread/Model.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __TFLREAD_MODEL_H__ -#define __TFLREAD_MODEL_H__ - -#include - -#include - -namespace tflread -{ - -struct Model -{ - virtual ~Model() = default; - - virtual const ::tflite::Model *model(void) const = 0; -}; - -/** - * @brief Load TensorFlow Lite model (as a raw Model) from a given path - * - * @note May return a nullptr - */ -std::unique_ptr load_tflite(const std::string &path); - -} // namespace tflread - -#endif // __TFLREAD_MODEL_H__ diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake index b1abf94..a11f6b2 100644 --- a/compiler/tfldump/requires.cmake +++ b/compiler/tfldump/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-tflite280") require("safemain") diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 2a87e47..4388fcd 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -33,7 +33,7 @@ void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t am std::ios_base::fmtflags saveflags(os.flags()); bool second = false; - bool ellipsis = amount > 0 && size > 4; + bool ellipsis = amount > 0 && size > 8; size_t count = ellipsis ? std::min(size, amount) : size; for (size_t i = 0; i < count; i++) @@ -103,8 +103,8 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector *fbvect) if (fbvect == nullptr) return os; - bool ellipsis = (fbvect->size() > 4); - auto limit_size = ellipsis ? 4 : fbvect->size(); + bool ellipsis = (fbvect->size() > 8); + auto limit_size = ellipsis ? 8 : fbvect->size(); if (ellipsis) { diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp deleted file mode 100644 index d2f6e06..0000000 --- a/compiler/tfldump/src/Load.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include - -namespace -{ - -class MemoryMappedModel final : public tflread::Model -{ -public: - /** - * @require fd and data SHOULD be valid - */ - explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size} - { - // DO NOTHING - } - -public: - ~MemoryMappedModel() - { - munmap(_data, _size); - close(_fd); - } - -public: - MemoryMappedModel(const MemoryMappedModel &) = delete; - MemoryMappedModel(MemoryMappedModel &&) = delete; - -public: - const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); } - -private: - int _fd = -1; - void *_data = nullptr; - size_t _size = 0; -}; - -class FileDescriptor final -{ -public: - FileDescriptor(int value) : _value{value} - { - // DO NOTHING - } - -public: - // NOTE Copy is not allowed - FileDescriptor(const FileDescriptor &) = delete; - -public: - // NOTE Move is allowed - FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); } - -public: - ~FileDescriptor() - { - if (_value != -1) - { - // Close on destructor - close(_value); - } - } - -public: - int value(void) const { return _value; } - -public: - int release(void) - { - auto res = _value; - _value = -1; - return res; - } - -private: - int _value = -1; -}; - -} // namespace - -namespace tflread -{ - -std::unique_ptr load_tflite(const std::string &path) -{ - FileDescriptor fd = open(path.c_str(), O_RDONLY); - - if (fd.value() == -1) - { - // Return nullptr on open failure - return nullptr; - } - - struct stat st; - if (fstat(fd.value(), &st) == -1) - { - // Return nullptr on fstat failure - return nullptr; - } - - auto size = st.st_size; - auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0); - - if (data == MAP_FAILED) - { - // Return nullptr on mmap failure - return nullptr; - } - - return std::unique_ptr{new MemoryMappedModel(fd.release(), data, size)}; -} - -} // namespace tflread diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index 47edcb0..2e8e713 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -736,6 +736,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for CEIL _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique(); _op_map[tflite::BuiltinOperator_CONV_2D] = make_unique(); + // There is no Option for DENSIFY _op_map[tflite::BuiltinOperator_DEPTH_TO_SPACE] = make_unique(); _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique(); // There is no Option for DEQUANTIZE diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt index 83fe23a..2e67d48 100644 --- a/compiler/tflite2circle-conversion-test/CMakeLists.txt +++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp index fb8c211..6afe1b0 100644 --- a/compiler/tflite2circle/driver/Driver.cpp +++ b/compiler/tflite2circle/driver/Driver.cpp @@ -36,24 +36,11 @@ int entry(int argc, char **argv) { arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"}; - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); - - arser.add_argument("tflite") - .nargs(1) - .type(arser::DataType::STR) - .help("Source tflite file path to convert"); - arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); + + arser.add_argument("tflite").help("Source tflite file path to convert"); + arser.add_argument("circle").help("Target circle file path"); try { diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h index 88a4f71..8149197 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions.h +++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h @@ -31,8 +31,10 @@ #include "BuildBuiltinOptions/ConcatenationOptions.h" #include "BuildBuiltinOptions/Conv2DOptions.h" #include "BuildBuiltinOptions/CosOptions.h" +#include "BuildBuiltinOptions/DensifyOptions.h" #include "BuildBuiltinOptions/DepthToSpaceOptions.h" #include "BuildBuiltinOptions/DepthwiseConv2DOptions.h" +#include "BuildBuiltinOptions/DequantizeOptions.h" #include "BuildBuiltinOptions/DivOptions.h" #include "BuildBuiltinOptions/EqualOptions.h" #include "BuildBuiltinOptions/ExpandDimsOptions.h" diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp new file mode 100644 index 0000000..4e58635 --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DensifyOptions.h" + +namespace tflite2circle +{ + +flatbuffers::Offset +build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *) +{ + circle::DensifyOptionsBuilder builtin_options_builder{fb}; + return builtin_options_builder.Finish(); +} + +} // namespace tflite2circle diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h new file mode 100644 index 0000000..b6126c4 --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BBO_DENSIFY_OPTIONS_H__ +#define __BBO_DENSIFY_OPTIONS_H__ + +#include +#include + +namespace tflite2circle +{ + +flatbuffers::Offset +build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op); + +} // namespace tflite2circle + +#endif // __BBO_DENSIFY_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp new file mode 100644 index 0000000..eeacece --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DequantizeOptions.h" +#include "DataLookup.h" + +namespace tflite2circle +{ + +flatbuffers::Offset +build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op) +{ + circle::DequantizeOptionsBuilder builtin_options_builder{fb}; + return builtin_options_builder.Finish(); +} + +} // namespace tflite2circle diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h new file mode 100644 index 0000000..1cb9f9c --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BBO_DEQUANTIZE_OPTIONS_H__ +#define __BBO_DEQUANTIZE_OPTIONS_H__ + +#include +#include + +namespace tflite2circle +{ + +flatbuffers::Offset +build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op); + +} // namespace tflite2circle + +#endif // __BBO_DEQUANTIZE_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp index d2d2888..db88d3e 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp @@ -25,8 +25,6 @@ namespace tflite2circle flatbuffers::Offset build_circle_MaximumMinimumOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op) { - auto tflite_builtin_options = op->builtin_options_as_MaximumMinimumOptions(); - assert(tflite_builtin_options); circle::MaximumMinimumOptionsBuilder builtin_options_builder{fb}; return builtin_options_builder.Finish(); } diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index d483b28..ac017b8 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -344,8 +344,13 @@ template <> void Offset::build(const TFLFlatBufVec *tflite_fla circle::OperatorCodeBuilder operator_code_builder{*_fb}; auto de_code = it->deprecated_builtin_code(); auto bt_code = it->builtin_code(); - operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code)); - operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code)); + auto cir_de_code = get_circle_builtin_code(de_code); + auto cir_bt_code = get_circle_builtin_code(bt_code); + // correct bt_code where bt_code == 0 for old tflite format + if (cir_bt_code == 0) + cir_bt_code = static_cast(cir_de_code); + operator_code_builder.add_deprecated_builtin_code(cir_de_code); + operator_code_builder.add_builtin_code(cir_bt_code); operator_code_builder.add_custom_code(custom_code); operator_code_builder.add_version(it->version()); auto code = operator_code_builder.Finish(); diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst index d55ba46..9cbf803 100644 --- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst +++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst @@ -42,7 +42,7 @@ TFL_BUILTIN_OPTIONS(TopKV2Options) TFL_BUILTIN_OPTIONS(SplitOptions) TFL_BUILTIN_OPTIONS(LogSoftmaxOptions) TFL_BUILTIN_OPTIONS(CastOptions) -//TFL_BUILTIN_OPTIONS(DequantizeOptions) +TFL_BUILTIN_OPTIONS(DequantizeOptions) TFL_BUILTIN_OPTIONS(MaximumMinimumOptions) TFL_BUILTIN_OPTIONS(ArgMaxOptions) TFL_BUILTIN_OPTIONS(LessOptions) @@ -106,3 +106,4 @@ TFL_BUILTIN_OPTIONS(RankOptions) TFL_BUILTIN_OPTIONS(ScatterNdOptions) TFL_BUILTIN_OPTIONS(SegmentSumOptions) TFL_BUILTIN_OPTIONS(BatchMatMulOptions) +TFL_BUILTIN_OPTIONS(DensifyOptions) diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 3841a1b..93c33cd 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000000140001) + set(VCONONE_VERSION 0x0000000000150001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp index d94a7ad..cebf7d9 100644 --- a/compiler/vconone/src/version.cpp +++ b/compiler/vconone/src/version.cpp @@ -54,7 +54,7 @@ std::string get_string(void) std::string get_copyright(void) { std::string str; - str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n"; + str = "Copyright (c) 2020-2022 Samsung Electronics Co., Ltd. All Rights Reserved\r\n"; str += "Licensed under the Apache License, Version 2.0\r\n"; str += "https://github.com/Samsung/ONE"; return str; diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt index 58f558d..c8d12c2 100644 --- a/compute/ARMComputeEx/CMakeLists.txt +++ b/compute/ARMComputeEx/CMakeLists.txt @@ -14,7 +14,7 @@ file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp") # generate embeded cl_kernel execute_process ( WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - COMMAND bash -c "python resolve_includes.py" + COMMAND bash -c "python3 resolve_includes.py" ) add_library(arm_compute_ex SHARED ${ACL_EX_SRCS}) diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt index 09f6725..9b3cd4f 100644 --- a/compute/cker/CMakeLists.txt +++ b/compute/cker/CMakeLists.txt @@ -17,3 +17,20 @@ target_include_directories(nnfw_lib_cker INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/i # Workaround to avoid warning # TODO Resolve warning target_compile_options(nnfw_lib_cker INTERFACE -Wno-attributes) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +set(TEST_CKER test_cker) + +file(GLOB_RECURSE TESTS "src/*.test.cc") + +add_executable(${TEST_CKER} ${TESTS}) + +target_link_libraries(${TEST_CKER} nnfw_lib_cker) +target_link_libraries(${TEST_CKER} nnfw_coverage) +target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD}) + +add_test(${TEST_CKER} ${TEST_CKER}) +install(TARGETS ${TEST_CKER} DESTINATION unittest_standalone) diff --git a/compute/cker/include/cker/CpuBackendThreadpool.h b/compute/cker/include/cker/CpuBackendThreadpool.h index cc6a9db..8ec6140 100644 --- a/compute/cker/include/cker/CpuBackendThreadpool.h +++ b/compute/cker/include/cker/CpuBackendThreadpool.h @@ -21,6 +21,8 @@ #include // from @ruy #include // from @ruy +#include + namespace nnfw { namespace cker @@ -33,7 +35,12 @@ using Task = ruy::Task; template void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context) { + assert(ruy_context != nullptr); assert(tasks_count <= ruy_context->max_num_threads()); + if (ruy_context == nullptr) + { + throw std::runtime_error("CpuBackendThreadpool.h: ruy::Context is null"); + } ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks); } diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h index 8bf0bee..45ad969 100644 --- a/compute/cker/include/cker/NeonTensorUtils.h +++ b/compute/cker/include/cker/NeonTensorUtils.h @@ -632,7 +632,7 @@ inline void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias, ruy_support::MakeRuyMatrix(rhs_params, input, &ruy_rhs, true); ruy_support::MakeRuyMatrix(dst_params, scratch, &ruy_dst); - ruy::BasicSpec ruy_mul_params; + ruy::MulParams ruy_mul_params; ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params); ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst); diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h index 16c937a..7cd54dc 100644 --- a/compute/cker/include/cker/operation/Conv.h +++ b/compute/cker/include/cker/operation/Conv.h @@ -57,9 +57,9 @@ class Conv public: Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {} - void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, - bool &is_replaced_weights, uint32_t dilationWidthFactor, - uint32_t dilationHeightFactor) + void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, + bool &is_replaced_weights, uint32_t dilationWidthFactor, + uint32_t dilationHeightFactor) { if (!_prepared) { @@ -71,9 +71,9 @@ public: } } - void prepareQuant(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, - uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, - uint32_t dilation_height_factor) + void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape, + const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, + uint32_t dilation_width_factor, uint32_t dilation_height_factor) { if (!_prepared) { @@ -138,13 +138,25 @@ public: } } + void operator()(const ConvParams ¶ms, const Shape &input_shape, const uint8_t *input_data, + const Shape &filter_shape, const uint8_t *filter_data, + const int32_t *filter_zero_point, const Shape &bias_shape, + const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data) + { + reference::Conv(params, _per_channel_output_multiplier.data(), + _per_channel_output_shift.data(), input_shape, input_data, + filter_shape, filter_data, filter_zero_point, bias_shape, + bias_data, output_shape, output_data); + } + void operator()(const ConvParams ¶ms, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data) { - reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(), - input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, - output_shape, output_data); + reference::Conv(params, _per_channel_output_multiplier.data(), + _per_channel_output_shift.data(), input_shape, input_data, + filter_shape, filter_data, nullptr /* filter_zero_point */, + bias_shape, bias_data, output_shape, output_data); } std::vector &per_channel_output_multiplier() { return _per_channel_output_multiplier; } std::vector &per_channel_output_shift() { return _per_channel_output_shift; } diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h index 06ee780..ed1f93d 100644 --- a/compute/cker/include/cker/operation/DepthwiseConv.h +++ b/compute/cker/include/cker/operation/DepthwiseConv.h @@ -25,6 +25,7 @@ #include "cker/operation/optimized/DepthwiseConvFloat.h" #include "cker/operation/optimized/DepthwiseConvUint8.h" #include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h" +#include "cker/operation/reference/integer_ops/DepthwiseConvUInt8.h" #include "cker/CpuBackendThreadpool.h" namespace nnfw diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h index 4474754..8bfd469 100644 --- a/compute/cker/include/cker/operation/reference/Conv.h +++ b/compute/cker/include/cker/operation/reference/Conv.h @@ -190,10 +190,13 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 } } +template inline void Conv(const ConvParams ¶ms, const int32_t *output_multiplier, - const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data, - const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, - const int32_t *bias_data, const Shape &output_shape, int8_t *output_data) + const int32_t *output_shift, const Shape &input_shape, const T *input_data, + const Shape &filter_shape, const T *filter_data, const int32_t *filter_zeropoint, + const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, + T *output_data) + { UNUSED_RELEASE(bias_shape); // Get parameters. @@ -259,26 +262,35 @@ inline void Conv(const ConvParams ¶ms, const int32_t *output_multiplier, for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = + const T input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; + const T filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; - // Accumulate with 32 bits accumulator. - // In the nudging process during model quantization, we force - // real value of 0.0 be represented by a quantized value. This - // guarantees that the input_offset is a int8_t, even though - // it is represented using int32_t. int32_t += int8_t * - // (int8_t - int8_t) so the highest value we can get from each - // accumulation is [-127, 127] * ([-128, 127] - - // [-128, 127]), which is [-32512, 32512]. log2(32512) - // = 14.98, which means we can accumulate at least 2^16 - // multiplications without overflow. The accumulator is - // applied to a filter so the accumulation logic will hold as - // long as the filter size (filter_y * filter_x * in_channel) - // does not exceed 2^16, which is the case in all the models - // we have seen so far. - // TODO(jianlijianli): Add a check to make sure the - // accumulator depth is smaller than 2^16. - acc += filter_val * (input_val + input_offset); + if (is_asymmetric) + { + const int32_t filter_offset = -filter_zeropoint[out_channel]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + else + { + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we force + // real value of 0.0 be represented by a quantized value. This + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold as + // long as the filter size (filter_y * filter_x * in_channel) + // does not exceed 2^16, which is the case in all the models + // we have seen so far. + // TODO(jianlijianli): Add a check to make sure the + // accumulator depth is smaller than 2^16. + acc += filter_val * (input_val + input_offset); + UNUSED_RELEASE(filter_zeropoint); + } } } } @@ -292,8 +304,7 @@ inline void Conv(const ConvParams ¶ms, const int32_t *output_multiplier, acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast(acc); } } } diff --git a/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h new file mode 100644 index 0000000..025e407 --- /dev/null +++ b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__ +#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__ + +#include "cker/Shape.h" +#include "cker/Types.h" +#include "cker/Utils.h" + +namespace nnfw +{ +namespace cker +{ +namespace reference_integer_ops +{ +inline void DepthwiseConvPerChannel(const DepthwiseConvParams ¶ms, + const int32_t *output_multiplier, const int32_t *output_shift, + const Shape &input_shape, const uint8_t *input_data, + const Shape &filter_shape, const uint8_t *filter_data, + const int32_t *filter_zeropoint, const Shape &bias_shape, + const int32_t *bias_data, const Shape &output_shape, + uint8_t *output_data) +{ + // Get parameters. + // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Check dimensions of the tensors. + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + assert(output_activation_min <= output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + UNUSED_RELEASE(output_depth); + UNUSED_RELEASE(bias_shape); + assert(output_depth == input_depth * depth_multiplier); + assert(bias_shape.FlatSize() == output_depth); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + uint8_t input_val = + input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; + uint8_t filter_val = + filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)]; + + // { for per-channel + // NOTE: The following comment is copied from tflite int8 implementation + // It may not be 100% true for uint8 per-channel. + // + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we force + // real value of 0.0 be represented by a quantized value. This + // guarantees that the input_offset is a int8, even though it + // is represented using int32_t. + // int32 += int8 * (int8 - int8) so the highest value we can + // get from each accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold as + // long as the filter size (filter_y * filter_x * in_channel) + // does not exceed 2^16, which is the case in all the models + // we have seen so far. + // TODO(jianlijianli): Add a check to make sure the + // accumulator depth is smaller than 2^16. + const int32_t filter_offset = -filter_zeropoint[output_channel]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + // } for per-channel + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel], + output_shift[output_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + // For q8u per-channel, int8_t -> uint8_t + output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast(acc); + } + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__ diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h index 62eeaf6..14489a8 100644 --- a/compute/cker/include/cker/ruy/RuySupport.h +++ b/compute/cker/include/cker/ruy/RuySupport.h @@ -64,23 +64,35 @@ void MakeRuyMatrix(const MatrixParams ¶ms, DataPointer data_ptr, } } -template -void MakeRuyMulParams(const GemmParamsType ¶ms, RuySpecType *ruy_mul_params) +// Integer-quantized case with destination type narrower than int32 +template +void MakeRuyMulParams(const GemmParams ¶ms, + ruy::MulParams *ruy_mul_params) { - // This validation has already been performed by the Gemm API entry point, - // but it doesn't hurt to test specifically this again here, where it's - // being used. - ValidateGemmParams(params); - - ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint); - ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent); - ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel); - ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel); + static_assert(sizeof(DstScalar) < sizeof(std::int32_t), ""); + if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier) + { + ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint); + ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent); + } + if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier) + { + ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel); + ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel); + } ruy_mul_params->set_bias(params.bias); ruy_mul_params->set_clamp_min(params.clamp_min); ruy_mul_params->set_clamp_max(params.clamp_max); } +// Raw-integer case with destination type int32. +template +void MakeRuyMulParams(const GemmParams ¶ms, + ruy::MulParams *ruy_mul_params) +{ + ruy_mul_params->set_bias(params.bias); +} + } // namespace ruy_support } // namespace cker } // namespace nnfw diff --git a/compute/test/cker/Range.cc b/compute/cker/src/Range.test.cc similarity index 100% rename from compute/test/cker/Range.cc rename to compute/cker/src/Range.test.cc diff --git a/compute/ruy/include/ruy/RuySupport.h b/compute/ruy/include/ruy/RuySupport.h index 7086a96..2f9ed74 100644 --- a/compute/ruy/include/ruy/RuySupport.h +++ b/compute/ruy/include/ruy/RuySupport.h @@ -64,23 +64,46 @@ void MakeRuyMatrix(const MatrixParams ¶ms, DataPointer data_ptr, } } -template -void MakeRuyMulParams(const GemmParamsType ¶ms, RuySpecType *ruy_mul_params) +// Floating-point case. +template +void MakeRuyMulParams(const GemmParams ¶ms, + ::ruy::MulParams *ruy_mul_params) { - // This validation has already been performed by the Gemm API entry point, - // but it doesn't hurt to test specifically this again here, where it's - // being used. - ValidateGemmParams(params); + static_assert(quantization_flavor == QuantizationFlavor::kFloatingPoint, ""); + ruy_mul_params->set_bias(params.bias); + ruy_mul_params->set_clamp_min(params.clamp_min); + ruy_mul_params->set_clamp_max(params.clamp_max); +} - ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint); - ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent); - ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel); - ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel); +// Integer-quantized case with destination type narrower than int32 +template +void MakeRuyMulParams(const GemmParams ¶ms, + ::ruy::MulParams *ruy_mul_params) +{ + static_assert(sizeof(DstScalar) < sizeof(std::int32_t), ""); + if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier) + { + ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint); + ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent); + } + if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier) + { + ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel); + ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel); + } ruy_mul_params->set_bias(params.bias); ruy_mul_params->set_clamp_min(params.clamp_min); ruy_mul_params->set_clamp_max(params.clamp_max); } +// Raw-integer case with destination type int32. +template +void MakeRuyMulParams(const GemmParams ¶ms, + ::ruy::MulParams *ruy_mul_params) +{ + ruy_mul_params->set_bias(params.bias); +} + } // namespace ruy_support } // namespace ruy } // namespace nnfw diff --git a/compute/ruy/include/ruy/operation/Conv.h b/compute/ruy/include/ruy/operation/Conv.h index 2b9c8c3..3f03694 100644 --- a/compute/ruy/include/ruy/operation/Conv.h +++ b/compute/ruy/include/ruy/operation/Conv.h @@ -169,7 +169,7 @@ private: ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true); ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst); - ::ruy::BasicSpec ruy_mul_params; + ::ruy::MulParams ruy_mul_params; ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params); ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst); diff --git a/compute/ruy/include/ruy/operation/FullyConnected.h b/compute/ruy/include/ruy/operation/FullyConnected.h index 59facdb..1d686b6 100644 --- a/compute/ruy/include/ruy/operation/FullyConnected.h +++ b/compute/ruy/include/ruy/operation/FullyConnected.h @@ -68,7 +68,7 @@ inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &inpu ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true); ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst); - ::ruy::BasicSpec ruy_mul_params; + ::ruy::MulParams ruy_mul_params; ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params); ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst); diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt deleted file mode 100644 index 92aac3e..0000000 --- a/compute/test/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -if(NOT ENABLE_TEST) - return() -endif(NOT ENABLE_TEST) - -set(TEST_COMPUTE test_compute) - -file(GLOB_RECURSE TESTS "*.cc") - -add_executable(${TEST_COMPUTE} ${TESTS}) - -target_link_libraries(${TEST_COMPUTE} nnfw_lib_cker) -target_link_libraries(${TEST_COMPUTE} gtest) -target_link_libraries(${TEST_COMPUTE} gtest_main) -target_link_libraries(${TEST_COMPUTE} ${LIB_PTHREAD} dl) -add_test(${TEST_COMPUTE} ${TEST_COMPUTE}) - -install(TARGETS ${TEST_COMPUTE} DESTINATION unittest_standalone) diff --git a/docs/conf.py b/docs/conf.py index 84197e6..409e5f7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors' author = 'Samsung Research & contributors' # The full version, including alpha/beta/rc tags -release = '1.20.0' +release = '1.21.0' # -- General configuration --------------------------------------------------- diff --git a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md index 1f8c0c2..57b2b78 100644 --- a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md +++ b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md @@ -174,34 +174,26 @@ $ vi j2/etc/systemd/system/ip.service and set as like: ``` [Service] -Type=simple Restart=always RestartSec=1 User=root -ExecStart=/bin/sh /bin/ip.sh +ExecStart=/bin/sh -c "ifconfig eth0 192.168.x.y netmask 255.255.255.0 up" [Install] WantedBy=multi-user.target ``` +Replace 192.168.x.y to your actual ip address. -(5-3) Add a new file -``` -$ vi j2/bin/ip.sh -``` -and set with IP address for your RPi4: -``` -ifconfig eth0 192.168.x.y netmask 255.255.255.0 up -``` -where you should update `192.168.x.y` part to your actual IP address. -(5-4) Add a symbolic link +(5-3) Add a symbolic link ``` +$ sudo mkdir -p j2/etc/systemd/system/multi-user.target.wants/ $ pushd j2/etc/systemd/system/multi-user.target.wants/ $ sudo ln -s ../../system/ip.service . $ popd ``` -(5-5) Now that every thing is ready, unmount and unplug your memory card and plug into +(5-4) Now that every thing is ready, unmount and unplug your memory card and plug into RPi4, turn on the power. ``` $ sync diff --git a/docs/release/1.20/index.rst b/docs/release/1.20/index.rst new file mode 100644 index 0000000..082d867 --- /dev/null +++ b/docs/release/1.20/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Tue Apr 26 10:18:12 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.20 +==== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.20.0.md diff --git a/docs/release/1.20/release-note-1.20.0.md b/docs/release/1.20/release-note-1.20.0.md new file mode 100644 index 0000000..2c75e06 --- /dev/null +++ b/docs/release/1.20/release-note-1.20.0.md @@ -0,0 +1,34 @@ +# Release Note 1.20.0 + +## ONE Compiler + +### Compiler Frontend + +- luci-interpreter supports multiple kernels with PAL layer including Cortext-M +- luci-interpreter supports integer tensor for partly kernels +- luci import support constant without coping to reduce memory for luci-interpreter +- Reduce duplicate codes to package released modules +- Limited support for ONNX LSTM/RNN unrolling while importing +- Limited support for ARM32 cross build +- Support new operator: SVDF +- New virtual CircleVariable to support tensor with variable +- Support quantization of BatchMatMul Op +- Support mixed(UINT8 + INT16) quantization +- Support backward propagation of quantization parameters +- Upgrade default python to version 3.8 +- Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0 +- Upgrade circle schema to follow tflite schema v3b +- Refactor to mio-tflite280, mio-circle04 with version and helpers methods +- Use one flatbuffers 2.0 version +- Drop support for TensorFlow 1.x +- Fix for several bugs, performance enhancements, and typos + +## ONE Runtime + +### Introduce TRIX backend +- TRIX backend supports trix binary with NHWC layout +- TRIX backend supports trix binary with input/output of Q8 and Q16 type + +### API supports new data type +- Symmetric Quantized int16 type named "NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED" + diff --git a/docs/release/1.21/index.rst b/docs/release/1.21/index.rst new file mode 100644 index 0000000..587065f --- /dev/null +++ b/docs/release/1.21/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Wed Sep 06 12:18:12 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.21 +==== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.21.0.md diff --git a/docs/release/1.21/release-note_1.21.0.md b/docs/release/1.21/release-note_1.21.0.md new file mode 100644 index 0000000..49bf074 --- /dev/null +++ b/docs/release/1.21/release-note_1.21.0.md @@ -0,0 +1,35 @@ +# Release Note 1.21.0 + +## ONE Compiler + +- Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool +- Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter` +- Introduced `Workflow`(WIP) in `one-cmds` +- New option `quant_config` in `one-quantize` +- New option `fake_quantize` in `one-quantize` +- More Ops supported: Densify +- More Ops for quantization: ReduceMax +- More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze +- More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax +- New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify` +- Improved reshape elimination in `convert_nchw_to_nhwc` option. +- Support fusion of Channel-wise Add + Relu with TConv +- Support negative axis in ArgMin/Max +- Show errors for unrecognized options in `one-optimize` +- Fix shape inference for `StridedSlice` +- Fix FuseBatchNormWithTConvPass to support TConv with bias +- Deprecate `--O1` option in `circle2circle` +- Support gcc-11 +- Support limited Float16 for kernels constants with dequantization to Float32 + +## ONE Runtime + +### Basic Multimodel nnpackage +- Runtime supports to run nnpackage with two models + +### Channel Wise Quantization on Conv2D and Depthwise Conv2D +- Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type. + +### Batch Execution with TRIX backend +- TRIX backend supports batch execution which run in parallel with multicore + diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake index 6616283..890055f 100644 --- a/infra/cmake/modules/IdentifyPlatform.cmake +++ b/infra/cmake/modules/IdentifyPlatform.cmake @@ -35,6 +35,8 @@ endif() if("${HOST_ARCH}" STREQUAL "x86_64") set(HOST_ARCH_BASE ${HOST_ARCH}) +elseif("${HOST_ARCH}" STREQUAL "armv7em") + set(HOST_ARCH_BASE "arm") elseif("${HOST_ARCH}" STREQUAL "armv7l") set(HOST_ARCH_BASE "arm") elseif("${HOST_ARCH}" STREQUAL "armv7hl") @@ -49,6 +51,8 @@ endif() if("${TARGET_ARCH}" STREQUAL "x86_64") set(TARGET_ARCH_BASE ${TARGET_ARCH}) +elseif("${TARGET_ARCH}" STREQUAL "armv7em") + set(TARGET_ARCH_BASE "arm") elseif("${TARGET_ARCH}" STREQUAL "armv7l") set(TARGET_ARCH_BASE "arm") elseif("${TARGET_ARCH}" STREQUAL "armv7hl") diff --git a/infra/cmake/packages/AbseilConfig.cmake b/infra/cmake/packages/AbseilConfig.cmake index 6fae721..b3cb364 100644 --- a/infra/cmake/packages/AbseilConfig.cmake +++ b/infra/cmake/packages/AbseilConfig.cmake @@ -12,11 +12,18 @@ function(_Abseil_import) # NOTE Turn off abseil testing set(BUILD_TESTING OFF) + # Set -fPIC property because Abseil-cpp can be used for shared library + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + # Abseil-cpp 20211102.0 show warning without below setting + set(ABSL_PROPAGATE_CXX_STD ON) + add_extdirectory("${AbseilSource_DIR}" ABSEIL) add_library(abseil INTERFACE) + target_link_libraries(abseil INTERFACE # From "Available Abseil CMake Public Targets" in CMake/README.md + # Add absl::status (It is not listed in CMake/README.md) absl::algorithm absl::base absl::debugging @@ -27,19 +34,14 @@ function(_Abseil_import) absl::numeric absl::random_random absl::strings - absl::status absl::synchronization absl::time absl::utility + absl::status ) endif(NOT TARGET abseil) set(Abseil_FOUND TRUE PARENT_SCOPE) endfunction(_Abseil_import) -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fPIC") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fPIC") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fPIC") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC") - _Abseil_import() diff --git a/infra/cmake/packages/AbseilSourceConfig.cmake b/infra/cmake/packages/AbseilSourceConfig.cmake index 8aeb86d..0297c08 100644 --- a/infra/cmake/packages/AbseilSourceConfig.cmake +++ b/infra/cmake/packages/AbseilSourceConfig.cmake @@ -7,14 +7,13 @@ function(_AbseilSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - # NOTE TensorFlow 2.3 downloads abseil from the following URL + # NOTE TensorFlow 2.9 downloads abseil 20211102.0 envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") - envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz) - + envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20211102.0.tar.gz) ExternalSource_Download(ABSEIL DIRNAME ABSEIL URL ${ABSEIL_URL} - CHECKSUM MD5=4d9aa7e757adf48fef171c85f0d88552) + CHECKSUM MD5=bdca561519192543378b7cade101ec43) set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE) set(AbseilSource_FOUND TRUE PARENT_SCOPE) diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake index 99118c5..d1588d3 100644 --- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake +++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake @@ -2,7 +2,8 @@ function(_CMSISSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(CMSIS_5_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz) set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad) ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL} diff --git a/infra/cmake/packages/CaffeSourceConfig.cmake b/infra/cmake/packages/CaffeSourceConfig.cmake index 41cc2c9..05eb5b3 100644 --- a/infra/cmake/packages/CaffeSourceConfig.cmake +++ b/infra/cmake/packages/CaffeSourceConfig.cmake @@ -7,7 +7,8 @@ function(_CaffeSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(CAFFE_URL https://github.com/BVLC/caffe/archive/1.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(CAFFE_URL ${EXTERNAL_DOWNLOAD_SERVER}/BVLC/caffe/archive/1.0.tar.gz) ExternalSource_Download(CAFFE ${CAFFE_URL}) diff --git a/infra/cmake/packages/CpuInfoSourceConfig.cmake b/infra/cmake/packages/CpuInfoSourceConfig.cmake index 60419ad..b93a6a2 100644 --- a/infra/cmake/packages/CpuInfoSourceConfig.cmake +++ b/infra/cmake/packages/CpuInfoSourceConfig.cmake @@ -8,8 +8,8 @@ function(_CpuInfoSource_import) nnas_include(OptionTools) envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") - # CPUINFO commit including patch from tflite v2.3 - envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/63b254577ed77a8004a9be6ac707f3dccc4e1fd9.tar.gz) + # CPUINFO commit from tflite v2.8 + envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.tar.gz) ExternalSource_Download(CPUINFO DIRNAME CPUINFO URL ${CPUINFO_URL}) diff --git a/infra/cmake/packages/Egl_HeadersSourceConfig.cmake b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake new file mode 100644 index 0000000..fae57f6 --- /dev/null +++ b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_Egl_HeadersSource_import) + if(NOT DOWNLOAD_EGL_HEADERS) + set(Egl_HeadersSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_EGL_HEADERS) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(EGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/EGL-Registry/archive/649981109e263b737e7735933c90626c29a306f2.zip) + + ExternalSource_Download(EGL_HEADERS + DIRNAME EGL_HEADERS + URL ${EGL_HEADERS_URL}) + + set(Egl_HeadersSource_DIR ${EGL_HEADERS_SOURCE_DIR} PARENT_SCOPE) + set(Egl_HeadersSource_FOUND TRUE PARENT_SCOPE) +endfunction(_Egl_HeadersSource_import) + +_Egl_HeadersSource_import() diff --git a/infra/cmake/packages/FarmhashSourceConfig.cmake b/infra/cmake/packages/FarmhashSourceConfig.cmake index a19c8b9..fa1867c 100644 --- a/infra/cmake/packages/FarmhashSourceConfig.cmake +++ b/infra/cmake/packages/FarmhashSourceConfig.cmake @@ -10,7 +10,8 @@ function(_FarmhashSource_import) # NOTE TensorFlow 1.12 downloads farmhash from the following URL # TensorFlow 1.13.1 downloads farmhash from the following URL # TensorFlow 2.3.0 downloads farmhash from the following URL - envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(FARMHASH_1_12_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz) ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL}) diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake index a0a32aa..e094055 100644 --- a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake +++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake @@ -7,7 +7,8 @@ function(_FlatBuffersSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(FLATBUFFERS_2_0_URL https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(FLATBUFFERS_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/v2.0.0.tar.gz) ExternalSource_Download(FLATBUFFERS DIRNAME FLATBUFFERS-2.0 CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd diff --git a/infra/cmake/packages/Fp16SourceConfig.cmake b/infra/cmake/packages/Fp16SourceConfig.cmake index 3623fd2..3df4e4c 100644 --- a/infra/cmake/packages/Fp16SourceConfig.cmake +++ b/infra/cmake/packages/Fp16SourceConfig.cmake @@ -9,7 +9,7 @@ function(_Fp16Source_import) envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") # fp16 commit in xnnpack 8b283aa30a31 - envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.tar.gz) + envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.tar.gz) ExternalSource_Download(FP16 DIRNAME FP16 URL ${FP16_URL}) diff --git a/infra/cmake/packages/GEMMLowpSourceConfig.cmake b/infra/cmake/packages/GEMMLowpSourceConfig.cmake index 6e1cfa9..3b35603 100644 --- a/infra/cmake/packages/GEMMLowpSourceConfig.cmake +++ b/infra/cmake/packages/GEMMLowpSourceConfig.cmake @@ -9,7 +9,8 @@ function(_GEMMLowpSource_import) # NOTE TensorFlow 1.12 uses the following URL # TensorFlow 1.13.1 uses the following URL - envoption(GEMMLOWP_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz) ExternalSource_Download(GEMMLOWP ${GEMMLOWP_URL}) diff --git a/infra/cmake/packages/GFlagsSourceConfig.cmake b/infra/cmake/packages/GFlagsSourceConfig.cmake index 3e70d89..2f9b753 100644 --- a/infra/cmake/packages/GFlagsSourceConfig.cmake +++ b/infra/cmake/packages/GFlagsSourceConfig.cmake @@ -7,7 +7,8 @@ function(_GFlagsSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(GFLAGS_URL https://github.com/gflags/gflags/archive/v2.2.1.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(GFLAGS_URL ${EXTERNAL_DOWNLOAD_SERVER}/gflags/gflags/archive/v2.2.1.tar.gz) ExternalSource_Download(GFLAGS ${GFLAGS_URL}) diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake index e57d096..643c3d1 100644 --- a/infra/cmake/packages/GTestSourceConfig.cmake +++ b/infra/cmake/packages/GTestSourceConfig.cmake @@ -7,7 +7,8 @@ function(_GTestSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(GTEST_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/googletest/archive/release-1.11.0.tar.gz) ExternalSource_Download(GTEST ${GTEST_URL}) diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake index 9db048c..3440dbd 100644 --- a/infra/cmake/packages/HDF5SourceConfig.cmake +++ b/infra/cmake/packages/HDF5SourceConfig.cmake @@ -7,7 +7,8 @@ function(_HDF5Source_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(HDF5_URL ${EXTERNAL_DOWNLOAD_SERVER}/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz) ExternalSource_Download(HDF5 ${HDF5_URL} PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch) diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake index 3195ea4..8d67285 100644 --- a/infra/cmake/packages/JsoncppSourceConfig.cmake +++ b/infra/cmake/packages/JsoncppSourceConfig.cmake @@ -7,7 +7,8 @@ function(_JsoncppSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(JSONCPP_URL https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(JSONCPP_URL ${EXTERNAL_DOWNLOAD_SERVER}/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz) ExternalSource_Download(JSONCPP ${JSONCPP_URL}) diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake index 8055545..e55647d 100644 --- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake +++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake @@ -2,7 +2,8 @@ function(_MbedOSSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(MBEDOS_6_15_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz) set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e) ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL} diff --git a/infra/cmake/packages/NEON2SSESourceConfig.cmake b/infra/cmake/packages/NEON2SSESourceConfig.cmake index bd40267..82c71e2 100644 --- a/infra/cmake/packages/NEON2SSESourceConfig.cmake +++ b/infra/cmake/packages/NEON2SSESourceConfig.cmake @@ -8,10 +8,10 @@ function(_NEON2SSESource_import) nnas_include(OptionTools) # NOTE TensorFlow 1.13.1 downloads NEON2SSE from the following URL - # NOTE TensorFlow 2.1 downloads NEON2SSE from the following URL - # NOTE TensorFlow 2.2 downloads NEON2SSE from the following URL - # NOTE TensorFlow 2.3 downloads NEON2SSE from the following URL - envoption(NEON2SSE_URL https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz) + # NOTE TensorFlow 2.8.0 downloads NEON2SSE from the following URL + # NOTE commit c12f8932c3be5aebaf35562d699f645686c4e2c3 will resolve build fail on debug build + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz) ExternalSource_Download(NEON2SSE ${NEON2SSE_URL}) diff --git a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake index c9fb5e4..fe21f6d 100644 --- a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake +++ b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake @@ -7,7 +7,8 @@ function(_ONNXSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(ONNX_1_4_1_URL https://github.com/onnx/onnx/archive/v1.4.1.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(ONNX_1_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.4.1.zip) ExternalSource_Download(ONNX DIRNAME ONNX-1.4.1 CHECKSUM MD5=604b43a22fbc758f32ae9f3a4fb9d397 diff --git a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake index ef903f8..b2ad08b 100644 --- a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake +++ b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake @@ -7,7 +7,8 @@ function(_ONNXSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(ONNX_1_6_0_URL https://github.com/onnx/onnx/archive/v1.6.0.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(ONNX_1_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.6.0.zip) ExternalSource_Download(ONNX DIRNAME ONNX-1.6.0 CHECKSUM MD5=cbdc547a527f1b59c7f066c8d258b966 diff --git a/infra/cmake/packages/OouraFFTSourceConfig.cmake b/infra/cmake/packages/OouraFFTSourceConfig.cmake index be551fb..d84b5b2 100644 --- a/infra/cmake/packages/OouraFFTSourceConfig.cmake +++ b/infra/cmake/packages/OouraFFTSourceConfig.cmake @@ -8,7 +8,8 @@ function(_OouraFFTSource_import) nnas_include(OptionTools) # NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL - envoption(OOURAFFT_URL https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(OOURAFFT_URL ${EXTERNAL_DOWNLOAD_SERVER}/petewarden/OouraFFT/archive/v1.0.tar.gz) ExternalSource_Download(OOURAFFT ${OOURAFFT_URL}) diff --git a/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake new file mode 100644 index 0000000..c5a774a --- /dev/null +++ b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_Opengl_HeadersSource_import) + if(NOT DOWNLOAD_OPENGL_HEADERS) + set(Opengl_HeadersSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_OPENGL_HEADERS) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(OPENGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/OpenGL-Registry/archive/0cb0880d91581d34f96899c86fc1bf35627b4b81.zip) + + ExternalSource_Download(OPENGL_HEADERS + DIRNAME OPENGL_HEADERS + URL ${OPENGL_HEADERS_URL}) + + set(Opengl_HeadersSource_DIR ${OPENGL_HEADERS_SOURCE_DIR} PARENT_SCOPE) + set(Opengl_HeadersSource_FOUND TRUE PARENT_SCOPE) +endfunction(_Opengl_HeadersSource_import) + +_Opengl_HeadersSource_import() diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake index baa49ee..a1704e5 100644 --- a/infra/cmake/packages/ProtobufSourceConfig.cmake +++ b/infra/cmake/packages/ProtobufSourceConfig.cmake @@ -7,7 +7,8 @@ function(_ProtobufSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(PROTOBUF_URL ${EXTERNAL_DOWNLOAD_SERVER}/protocolbuffers/protobuf/archive/v3.5.2.tar.gz) ExternalSource_Download(PROTOBUF ${PROTOBUF_URL} PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch) diff --git a/infra/cmake/packages/Pybind11SourceConfig.cmake b/infra/cmake/packages/Pybind11SourceConfig.cmake index 76f51e4..2f64253 100644 --- a/infra/cmake/packages/Pybind11SourceConfig.cmake +++ b/infra/cmake/packages/Pybind11SourceConfig.cmake @@ -7,7 +7,8 @@ function(_Pybind11Source_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(PYBIND11_URL ${EXTERNAL_DOWNLOAD_SERVER}/pybind/pybind11/archive/v2.5.0.tar.gz) ExternalSource_Download(PYBIND11 ${PYBIND11_URL}) diff --git a/infra/cmake/packages/PytorchSourceConfig.cmake b/infra/cmake/packages/PytorchSourceConfig.cmake index 0212f2f..94757f8 100644 --- a/infra/cmake/packages/PytorchSourceConfig.cmake +++ b/infra/cmake/packages/PytorchSourceConfig.cmake @@ -7,7 +7,8 @@ function(_PytorchSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(PYTORCH_URL https://github.com/pytorch/pytorch/archive/v0.4.1.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(PYTORCH_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/pytorch/archive/v0.4.1.tar.gz) ExternalSource_Download(PYTORCH ${PYTORCH_URL}) diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake index f846755..8120ebc 100644 --- a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowEigenSource_import) # Exact version used by TensorFlow v2.1.0. # See tensorflow/tensorflow/workspace.bzl. - envoption(TENSORFLOW_2_1_0_EIGEN_URL https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com") + envoption(TENSORFLOW_2_1_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz) ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.1.0-EIGEN ${TENSORFLOW_2_1_0_EIGEN_URL}) diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake new file mode 100644 index 0000000..6f59f07 --- /dev/null +++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_TensorFlowEigenSource_import) + if(NOT DOWNLOAD_EIGEN) + set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_EIGEN) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + # Exact version used by TensorFlow v2.8.0. + # See tensorflow/third_party/eigen3/workspace.bzl. + envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com") + envoption(TENSORFLOW_2_8_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/008ff3483a8c5604639e1c4d204eae30ad737af6/eigen-e1dd31ce174c3d26fbe38388f64b09d2adbd7557a59e90e6f545a288cc1755fc.tar.gz) + + ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.8.0-EIGEN ${TENSORFLOW_2_8_0_EIGEN_URL}) + + set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowEigenSource_import) + +_TensorFlowEigenSource_import() diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake new file mode 100644 index 0000000..2ad2e24 --- /dev/null +++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.8.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake index 035264f..421be6c 100644 --- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import) # Exact version used by TensorFlow v2.1.0. # See tensorflow/tensorflow/workspace.bzl. - envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip) ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.1.0-GEMMLOWP ${TENSORFLOW_2_1_0_GEMMLOWP_URL}) diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake index bc13d62..44c56a6 100644 --- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import) # Exact version used by TensorFlow v2.3.0. # See tensorflow/tensorflow/workspace.bzl. - envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL}) diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake index b7f3148..76cdfdd 100644 --- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import) # Exact version used by TensorFlow v2.6.0. # See tensorflow/third_party/gemmlowp/workspace.bzl. - envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL}) diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake index f3663cc..3e17490 100644 --- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import) # Exact version used by TensorFlow v2.8.0. # See tensorflow/third_party/gemmlowp/workspace.bzl. - envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL}) diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake index f1debe7..369816a 100644 --- a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake @@ -13,7 +13,7 @@ function(_TensorFlowGpuSource_Import) set(PATCH_DONE "TRUE") endif() endif() - + if(${PATCH_DONE} STREQUAL "TRUE") message(STATUS "Skip downloading TensorFlowGpuSource") set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE) @@ -28,7 +28,8 @@ function(_TensorFlowGpuSource_Import) # Download TFLite Source Code nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_4_1_URL https://github.com/tensorflow/tensorflow/archive/v2.4.1.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.4.1.tar.gz) ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL}) # Patch for non used codes on onert backend/gpu_cl diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake index 3dbf05e..3a7dc89 100644 --- a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import) # Exact version used by TensorFlow v2.3.0. # See tensorflow/third_party/ruy/workspace.bzl - envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_3_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip) ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL}) diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake index b4dee91..e4dd4f2 100644 --- a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake @@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import) # Exact version used by TensorFlow v2.6.0. # See tensorflow/third_party/ruy/workspace.bzl - envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_6_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip) ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL}) diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake new file mode 100644 index 0000000..2ead7cd --- /dev/null +++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake @@ -0,0 +1,21 @@ +function(_TensorFlowRuySource_import) + if(NOT DOWNLOAD_RUY) + set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_RUY) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + # Exact version used by TensorFlow v2.8.0. + # See tensorflow/third_party/ruy/workspace.bzl + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_8_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip) + + ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.8.0-RUY ${TENSORFLOW_2_8_0_RUY_URL}) + + set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowRuySource_import) + +_TensorFlowRuySource_import() diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake new file mode 100644 index 0000000..2ad2e24 --- /dev/null +++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.8.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake index bcdf9f2..33538c2 100644 --- a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_1_14_URL https://github.com/tensorflow/tensorflow/archive/v1.14.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_1_14_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.14.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-1.14 ${TENSORFLOW_1_14_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake index 0d2a950..aabc22f 100644 --- a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_1_0_URL https://github.com/tensorflow/tensorflow/archive/v2.1.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_1_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.1.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.1.0 ${TENSORFLOW_2_1_0_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake index 71220d7..7dabf88 100644 --- a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_2_0_URL https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.2.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.2.0 ${TENSORFLOW_2_2_0_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake index 82df579..967d49e 100644 --- a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake @@ -10,7 +10,8 @@ function(_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_3_0_RC0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_3_0_RC0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0-RC0 ${TENSORFLOW_2_3_0_RC0_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake index 5c3a0f8..0ad0cda 100644 --- a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_3_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake index 611c7c8..9a7af17 100644 --- a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.6.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL}) diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake index 4abe2ea..988a0f4 100644 --- a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake +++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake @@ -7,7 +7,8 @@ function(_TensorFlowSource_import) nnas_include(ExternalSourceTools) nnas_include(OptionTools) - envoption(TENSORFLOW_2_8_0_URL https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz) + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(TENSORFLOW_2_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.8.0.tar.gz) ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL}) diff --git a/infra/cmake/packages/VulkanSourceConfig.cmake b/infra/cmake/packages/VulkanSourceConfig.cmake new file mode 100644 index 0000000..76b6989 --- /dev/null +++ b/infra/cmake/packages/VulkanSourceConfig.cmake @@ -0,0 +1,20 @@ +function(_VulkanSource_import) + if(NOT ${DOWNLOAD_VULKAN}) + set(VulkanSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_VULKAN}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + envoption(VULKAN_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/Vulkan-Headers/archive/ec2db85225ab410bc6829251bef6c578aaed5868.tar.gz) + ExternalSource_Download(VULKAN + DIRNAME VULKAN + URL ${VULKAN_URL}) + + set(VulkanSource_DIR ${VULKAN_SOURCE_DIR} PARENT_SCOPE) + set(VulkanSource_FOUND TRUE PARENT_SCOPE) +endfunction(_VulkanSource_import) + +_VulkanSource_import() diff --git a/infra/command/format b/infra/command/format index 5cf9606..993a6ad 100644 --- a/infra/command/format +++ b/infra/command/format @@ -154,11 +154,9 @@ function check_python_files() { fi # Check python files - FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'` + FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`) # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv - FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK_PYTHON" | egrep -v '^compiler/one-cmds/.*\..*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'` - # Transform to array - FILES_TO_CHECK_PYTHON=($FILES_TO_CHECK_PYTHON) + FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`) for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do skip=${s#'.'/}/ diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report index 3058aee..df6377d 100644 --- a/infra/command/gen-coverage-report +++ b/infra/command/gen-coverage-report @@ -69,10 +69,10 @@ done opencl_files=($(find ./runtime/onert/backend/gpu_cl/open_cl/ \( -name "*.cc" -o -name "*.h" \) -exec realpath {} \; )) -# Exclude *.test.cpp files from coverage report +# Exclude test files from coverage report # Exclude flatbuffer generated files from coverage report "${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \ - '*.test.cpp' '*_schema_generated.h' "${opencl_files[@]}" + '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h' "${opencl_files[@]}" # Final coverage data cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH} diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog index 2763ac5..ddca70a 100644 --- a/infra/debian/compiler/changelog +++ b/infra/debian/compiler/changelog @@ -1,3 +1,50 @@ +one (1.21.0) bionic; urgency=medium + + * Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool + * Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter` + * Introduced `Workflow`(WIP) in `one-cmds` + * New option `quant_config` in `one-quantize` + * New option `fake_quantize` in `one-quantize` + * More Ops supported: Densify + * More Ops for quantization: ReduceMax + * More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze + * More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax + * New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify` + * Improved reshape elimination in `convert_nchw_to_nhwc` option. + * Support fusion of Channel-wise Add + Relu with TConv + * Support negative axis in ArgMin/Max + * Show errors for unrecognized options in `one-optimize` + * Fix shape inference for `StridedSlice` + * Fix FuseBatchNormWithTConvPass to support TConv with bias + * Deprecate `--O1` option in `circle2circle` + * Support gcc-11 + * Support limited Float16 for kernels constants with dequantization to Float32 + + -- seongwoo Wed, 06 Sep 2022 12:00:00 +0900 + +one (1.20.0) bionic; urgency=medium + + * luci-interpreter supports multiple kernels with PAL layer including Cortext-M + * luci-interpreter supports integer tensor for partly kernels + * luci import support constant without coping to reduce memory for luci-interpreter + * Reduce duplicate codes to package released modules + * Limited support for ONNX LSTM/RNN unrolling while importing + * Limited support for ARM32 cross build + * Support new operator: SVDF + * New virtual CircleVariable to support tensor with variable + * Support quantization of BatchMatMul Op + * Support mixed(UINT8 + INT16) quantization + * Support backward propagation of quantization parameters + * Upgrade default python to version 3.8 + * Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0 + * Upgrade circle schema to follow tflite schema v3b + * Refactor to mio-tflite280, mio-circle04 with version and helpers methods + * Use one flatbuffers 2.0 version + * Drop support for TensorFlow 1.x + * Fix for several bugs, performance enhancements, and typos + + -- seongwoo Tue, 26 Apr 2022 12:00:00 +0900 + one (1.19.0) bionic; urgency=medium * `circle-quantizer` supports input/output type option diff --git a/infra/debian/compiler/docs/one-infer.1 b/infra/debian/compiler/docs/one-infer.1 new file mode 100644 index 0000000..a1bafbb --- /dev/null +++ b/infra/debian/compiler/docs/one-infer.1 @@ -0,0 +1,46 @@ +.TH ONE-INFER "1" "July 2022" "one-infer version 1.21.0" "User Commands" +.SH NAME +one-infer \- manual page for one-infer version 1.21.0 +.SH DESCRIPTION +usage: one\-infer [\-h] [\-v] [\-C CONFIG] [\-d DRIVER | \fB\-b\fR BACKEND] [\-\-post\-process POST_PROCESS] [\-\-] [COMMANDS FOR BACKEND DRIVER] +.PP +command line tool to infer model +.SS "optional arguments:" +.TP +\fB\-h\fR, \fB\-\-help\fR +show this help message and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +show program's version number and exit +.TP +\fB\-V\fR, \fB\-\-verbose\fR +output additional information to stdout or stderr +.TP +\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG +run with configuation file +.TP +\fB\-d\fR DRIVER, \fB\-\-driver\fR DRIVER +backend inference driver name to execute +.TP +\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND +backend name to use +.TP +\fB\-\-post\-process\fR POST_PROCESS +post processing script to convert I/O data to standard +format +.SH COPYRIGHT +Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved +Licensed under the Apache License, Version 2.0 +https://github.com/Samsung/ONE +.SH "SEE ALSO" +The full documentation for +.B one-infer +is maintained as a Texinfo manual. If the +.B info +and +.B one-infer +programs are properly installed at your site, the command +.IP +.B info one-infer +.PP +should give you access to the complete manual. diff --git a/infra/debian/compiler/docs/one-partition.1 b/infra/debian/compiler/docs/one-partition.1 new file mode 100644 index 0000000..5b6fe93 --- /dev/null +++ b/infra/debian/compiler/docs/one-partition.1 @@ -0,0 +1,56 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6. +.TH ONE-PARTITION "1" "June 2022" "one-partition version 1.21.0" "User Commands" +.SH NAME +one-partition \- manual page for one-partition version 1.21.0 +.SH DESCRIPTION +usage: one\-partition [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-backends BACKENDS] +.TP +[\-\-default DEFAULT] [\-\-part_file PART_FILE] +[\-\-input_file INPUT_FILE] [\-\-work_path WORK_PATH] +.PP +command line tool to partition circle model by multiple backends +.SS "optional arguments:" +.TP +\fB\-h\fR, \fB\-\-help\fR +show this help message and exit +.TP +\fB\-v\fR, \fB\-\-version\fR +show program's version number and exit +.TP +\fB\-V\fR, \fB\-\-verbose\fR +output additional information to stdout or stderr +.TP +\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG +run with configuation file +.TP +\fB\-\-backends\fR BACKENDS +backends in CSV to use for partitioning +.TP +\fB\-\-default\fR DEFAULT +default backend to assign +.TP +\fB\-\-part_file\fR PART_FILE +partition file which provides backend to assign +.TP +\fB\-\-input_file\fR INPUT_FILE +input circle model filename +.TP +\fB\-\-work_path\fR WORK_PATH +work path of partition, input files exist and output +files are produced +.SH COPYRIGHT +Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved +Licensed under the Apache License, Version 2.0 +https://github.com/Samsung/ONE +.SH "SEE ALSO" +The full documentation for +.B one-partition +is maintained as a Texinfo manual. If the +.B info +and +.B one-partition +programs are properly installed at your site, the command +.IP +.B info one-partition +.PP +should give you access to the complete manual. diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install index 805ba86..65e46d1 100644 --- a/infra/debian/compiler/one-compiler.install +++ b/infra/debian/compiler/one-compiler.install @@ -1,6 +1,8 @@ # {FILES_TO_INSTALL} {DEST_DIR} # bin usr/bin/circle2circle usr/share/one/bin/ +usr/bin/circle-eval-diff usr/share/one/bin/ +usr/bin/circle-operator usr/share/one/bin/ usr/bin/circle-partitioner usr/share/one/bin/ usr/bin/circle-quantizer usr/share/one/bin/ usr/bin/generate_bcq_metadata.py usr/share/one/bin/ @@ -16,14 +18,21 @@ usr/bin/one-import-bcq usr/share/one/bin/ usr/bin/one-import-onnx usr/share/one/bin/ usr/bin/one-import-tf usr/share/one/bin/ usr/bin/one-import-tflite usr/share/one/bin/ +usr/bin/one-infer usr/share/one/bin/ usr/bin/one-optimize usr/share/one/bin/ usr/bin/one-pack usr/share/one/bin/ +usr/bin/one-partition usr/share/one/bin/ usr/bin/one-prepare-venv usr/share/one/bin/ usr/bin/one-profile usr/share/one/bin/ usr/bin/one-quantize usr/share/one/bin/ usr/bin/one-version usr/share/one/bin/ usr/bin/onelib/constant.py usr/share/one/bin/onelib/ usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/ +usr/bin/onelib/CfgRunner.py usr/share/one/bin/onelib/ +usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/ +usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/ +usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/ +usr/bin/onnx_legalizer.py usr/share/one/bin/ usr/bin/rawdata2hdf5 usr/share/one/bin/ usr/bin/record-minmax usr/share/one/bin/ usr/bin/tf2nnpkg usr/share/one/bin/ diff --git a/infra/debian/compiler/one-compiler.manpages b/infra/debian/compiler/one-compiler.manpages index 77f2f4e..e0284ae 100644 --- a/infra/debian/compiler/one-compiler.manpages +++ b/infra/debian/compiler/one-compiler.manpages @@ -1,5 +1,6 @@ debian/docs/one-build.1 debian/docs/one-codegen.1 +debian/docs/one-infer.1 debian/docs/one-import.1 debian/docs/one-import-bcq.1 debian/docs/one-import-onnx.1 @@ -7,6 +8,7 @@ debian/docs/one-import-tf.1 debian/docs/one-import-tflite.1 debian/docs/one-optimize.1 debian/docs/one-pack.1 +debian/docs/one-partition.1 debian/docs/one-profile.1 debian/docs/one-quantize.1 debian/docs/onecc.1 diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog index 4cf0abc..e07c50c 100644 --- a/infra/debian/runtime/changelog +++ b/infra/debian/runtime/changelog @@ -1,3 +1,18 @@ +one (1.21.0) bionic; urgency=low + + * Runtime supports to run nnpackage with two models + * Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type. + * TRIX backend supports batch execution which run in parallel with multicore + + -- Chunseok Lee Tue, 06 Sep 2022 12:00:00 +0900 + +one (1.20.0) bionic; urgency=low + + * Introduce TRIX backend + * API supports new data type NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED + + -- Chunseok Lee Wed, 26 Apr 2022 12:00:00 +0900 + one (1.19.0) bionic; urgency=low * Synch up version with ONE Compiler diff --git a/infra/debian/runtime/rules b/infra/debian/runtime/rules index dee87a9..97170ee 100755 --- a/infra/debian/runtime/rules +++ b/infra/debian/runtime/rules @@ -3,7 +3,7 @@ DEBVER := $(shell dpkg-parsechangelog -SVersion) export DH_VERBOSE = 1 export _DESTDIR = debian/tmp/ export BUILD_TYPE=release -export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_3_0=0 -DBUILD_TENSORFLOW_LITE=0 +export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0 export DEBIAN_BUILD=1 export INSTALL_PATH=debian/tmp/usr/ %: diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile index dbc22a6..f7ffc73 100644 --- a/infra/docker/bionic/Dockerfile +++ b/infra/docker/bionic/Dockerfile @@ -86,7 +86,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt RUN apt-get update && apt-get -qqy install gbs RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip RUN unzip -d tmp sdb.zip && rm sdb.zip -RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp +RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/* # Install java RUN apt-get install -y --no-install-recommends openjdk-8-jdk diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile index 6f3cd9b..1cdeffb 100644 --- a/infra/docker/focal/Dockerfile +++ b/infra/docker/focal/Dockerfile @@ -46,7 +46,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt RUN apt-get update && apt-get -qqy install gbs RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ubuntu-64.zip -O sdb.zip RUN unzip -d tmp sdb.zip && rm sdb.zip -RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp +RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/* # Clean archives (to reduce image size) RUN apt-get clean -y diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt index 2ff5a5f..768d797 100644 --- a/infra/nncc/CMakeLists.txt +++ b/infra/nncc/CMakeLists.txt @@ -1,4 +1,7 @@ -cmake_minimum_required(VERSION 3.1) +# The libboost 1.74 uses IN_LIST operator, which requires the policy CMP0057, in a CMake file. +# This policy requires ``cmake_minimum_required(VERSION 3.3)``. +# Run "cmake --help-policy CMP0057" for policy details. +cmake_minimum_required(VERSION 3.3) project(nncc) diff --git a/infra/nncc/cmake/options/options_armv7em-generic.cmake b/infra/nncc/cmake/options/options_armv7em-generic.cmake new file mode 100644 index 0000000..d671b73 --- /dev/null +++ b/infra/nncc/cmake/options/options_armv7em-generic.cmake @@ -0,0 +1,3 @@ +# +# armv7em generic cmake options +# diff --git a/infra/nnfw/CMakeLists.txt b/infra/nnfw/CMakeLists.txt index 897a16f..2a27eee 100644 --- a/infra/nnfw/CMakeLists.txt +++ b/infra/nnfw/CMakeLists.txt @@ -55,6 +55,12 @@ macro(nnas_find_package PREFIX) ) endmacro(nnas_find_package) +# C++14 feature requires 5 or later +# Using std::unordered_map shows build fail under 6.2 +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2) + message(FATAL "Runtime build requires GNU Compiler version 6.2 or later.") +endif() + set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/infra/nnfw/cmake/ApplyCompileFlags.cmake b/infra/nnfw/cmake/ApplyCompileFlags.cmake index b042b0c..b1c7ff5 100644 --- a/infra/nnfw/cmake/ApplyCompileFlags.cmake +++ b/infra/nnfw/cmake/ApplyCompileFlags.cmake @@ -31,3 +31,13 @@ endforeach() foreach(FLAG ${FLAGS_CXXONLY}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") endforeach() + +# lib pthread as a variable (finding pthread build option must be disabled on android) +# Define here to use on external lib build +set(LIB_PTHREAD lib_pthread) +add_library(${LIB_PTHREAD} INTERFACE) +if(NOT TARGET_OS STREQUAL "android") + # Get compile option (ex. "-pthread" on linux GNU build tool) + find_package(Threads) + target_link_libraries(${LIB_PTHREAD} INTERFACE Threads::Threads) +endif() diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake index 5371120..440f185 100644 --- a/infra/nnfw/cmake/CfgOptionFlags.cmake +++ b/infra/nnfw/cmake/CfgOptionFlags.cmake @@ -31,6 +31,8 @@ option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON) option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON) option(INSTALL_TEST_SCRIPTS "Install test scripts" ON) option(BUILD_GPU_CL "Build gpu_cl backend" OFF) +option(BUILD_NPUD "Build NPU daemon" ON) +option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" ON) # # Default build configuration for contrib # @@ -72,9 +74,10 @@ option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON) option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON) option(BUILD_BOOST "Build boost source" OFF) option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON) -option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF) +option(BUILD_TENSORFLOW_LITE_2_8_0 "Build TensorFlow Lite 2.8.0 from the downloaded source" OFF) option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF) option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON) +option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF) option(BUILD_RUY "Build ruy library from the downloaded source" ON) option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON) option(PROFILE_RUY "Enable ruy library profiling" OFF) diff --git a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake index e0c81de..fb63b3c 100644 --- a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake +++ b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake @@ -1,8 +1,5 @@ include("cmake/buildtool/config/config_linux.cmake") -# On Android, pthread is contained in bionic(libc) -set(LIB_PTHREAD "") - # SIMD for aarch64 set(FLAGS_COMMON ${FLAGS_COMMON} "-ftree-vectorize" diff --git a/infra/nnfw/cmake/buildtool/config/config_linux.cmake b/infra/nnfw/cmake/buildtool/config/config_linux.cmake index 86dd0f2..01b47ef 100644 --- a/infra/nnfw/cmake/buildtool/config/config_linux.cmake +++ b/infra/nnfw/cmake/buildtool/config/config_linux.cmake @@ -2,20 +2,11 @@ # linux common compile options # -# remove warning from arm cl +# Remove warning: ignoring attributes on template argument (ACL, Eigen, etc) # https://github.com/ARM-software/ComputeLibrary/issues/330 -set(GCC_VERSION_DISABLE_WARNING 6.0) -if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER GCC_VERSION_DISABLE_WARNING) - message(STATUS "GCC version higher than ${GCC_VERSION_DISABLE_WARNING}") - set(FLAGS_CXXONLY ${FLAGS_CXXONLY} - "-Wno-ignored-attributes" - ) -endif() +set(FLAGS_CXXONLY ${FLAGS_CXXONLY} "-Wno-ignored-attributes") # Disable annoying ABI compatibility warning. if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) list(APPEND FLAGS_CXXONLY "-Wno-psabi") endif() - -# lib pthread as a variable (pthread must be disabled on android) -set(LIB_PTHREAD pthread) diff --git a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake index dbd45fc..52d6c6b 100644 --- a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake +++ b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake @@ -7,6 +7,3 @@ message(STATUS "Building for x86-64 Darwin") set(FLAGS_COMMON ${FLAGS_COMMON} "-msse4" ) - -# lib pthread as a variable (pthread must be disabled on android) -set(LIB_PTHREAD pthread) diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake index 3356aa7..07b26a9 100644 --- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake +++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake @@ -21,12 +21,6 @@ endif() set(CMAKE_SYSROOT ${ROOTFS_DIR}) set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR}) -set(CMAKE_SHARED_LINKER_FLAGS - "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) -set(CMAKE_EXE_LINKER_FLAGS - "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) # search for programs in the build host directories set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake index 4d5d7ac..cab7325 100644 --- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake +++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake @@ -23,12 +23,6 @@ endif() set(CMAKE_SYSROOT ${ROOTFS_DIR}) set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR}) -set(CMAKE_SHARED_LINKER_FLAGS - "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) -set(CMAKE_EXE_LINKER_FLAGS - "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) # search for programs in the build host directories set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake index 8f2cb67..c69259f 100644 --- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake +++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake @@ -21,12 +21,6 @@ endif() set(CMAKE_SYSROOT ${ROOTFS_DIR}) set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR}) -set(CMAKE_SHARED_LINKER_FLAGS - "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) -set(CMAKE_EXE_LINKER_FLAGS - "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) # search for programs in the build host directories set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake index 72513cd..181415d 100644 --- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake +++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake @@ -23,12 +23,6 @@ endif() set(CMAKE_SYSROOT ${ROOTFS_DIR}) set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR}) -set(CMAKE_SHARED_LINKER_FLAGS - "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) -set(CMAKE_EXE_LINKER_FLAGS - "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}" - CACHE INTERNAL "" FORCE) # search for programs in the build host directories set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) @@ -46,10 +40,6 @@ add_compile_options(-mfpu=neon-vfpv4) add_compile_options(-mfloat-abi=softfp) add_compile_options(--sysroot=${ROOTFS_DIR}) -set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}") - -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}") - include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/) include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/armv7l-tizen-linux-gnueabi) add_compile_options(-Wno-deprecated-declarations) # compile-time option diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake index 9332f52..e95ccca 100644 --- a/infra/nnfw/cmake/options/options_aarch64-android.cmake +++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake @@ -10,3 +10,5 @@ option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF) option(DOWNLOAD_BOOST "Download boost source" ON) option(BUILD_BOOST "Build boost source" ON) option(BUILD_LOGGING "Build logging runtime" OFF) +# Do not support npud +option(BUILD_NPUD "Build NPU daemon" OFF) diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake index eab3b0a..9b487d9 100644 --- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake +++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake @@ -9,6 +9,7 @@ option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF) option(BUILD_LOGGING "Build logging runtime" OFF) option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF) option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF) +option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF) option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON) option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON) diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake index 31b7fd6..eea3722 100644 --- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake +++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake @@ -2,6 +2,7 @@ # x86_64 linux cmake options # option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF) +option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF) option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF) option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF) diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake index 6ae7dea..f6a4efd 100644 --- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake +++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake @@ -90,11 +90,11 @@ function(_ARMCompute_Build ARMComputeInstall_DIR) return() endif(NOT SCONS_PATH) - if(CMAKE_BUILD_TYPE) - string(TOLOWER "${CMAKE_BUILD_TYPE}" SCON_BUILD_TYPE) - else(CMAKE_BUILD_TYPE) + if(DEBUG_ARMCOMPUTE) + set(SCON_BUILD_TYPE "debug") + else(DEBUG_ARMCOMPUTE) set(SCON_BUILD_TYPE "release") - endif(CMAKE_BUILD_TYPE) + endif(DEBUG_ARMCOMPUTE) #### Architecture-specific configurations diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake index 878026d..dddec89 100644 --- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake +++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake @@ -16,14 +16,18 @@ function(_CpuInfo_Build) nnas_include(ExternalProjectTools) - set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "Build command-line tools") - set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "Build cpuinfo unit tests") - set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "Build cpuinfo mock tests") - set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "Build cpuinfo micro-benchmarks") + # Set build option + # - Static (position independent) + # - No logging + # - Library only (CPUINFO_RUNTIME_TYPE is not used) + set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "") + set(CPUINFO_LOG_LEVEL "none" CACHE STRING "") + set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "") + set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "") + set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "") + set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "") add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL) set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON) - # Suppress warnings generated by clog - set_target_properties(clog PROPERTIES COMPILE_FLAGS "-Wno-unused-result") set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE) set(CpuInfo_FOUND TRUE PARENT_SCOPE) endfunction(_CpuInfo_Build) diff --git a/infra/nnfw/cmake/packages/GLib2.0Config.cmake b/infra/nnfw/cmake/packages/GLib2.0Config.cmake new file mode 100644 index 0000000..d4c6bf2 --- /dev/null +++ b/infra/nnfw/cmake/packages/GLib2.0Config.cmake @@ -0,0 +1,41 @@ +function(_GLIB_2_0_import) + find_library(GLIB_LIBRARIES + NAMES glib-2.0) + + get_filename_component(GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} DIRECTORY) + find_path(GLIBCONFIG_INCLUDE_DIR + NAMES glibconfig.h + PATHS ${GLIB_LIBRARY_DIR} + PATH_SUFFIXES glib-2.0/include + NO_CMAKE_FIND_ROOT_PATH) + + find_path(GLIB_INCLUDE_DIR + NAMES glib.h + PATH_SUFFIXES glib-2.0) + + set(GLIB_FOUND TRUE) + + if(NOT GLIB_LIBRARIES) + set(GLIB_FOUND FALSE) + endif(NOT GLIB_LIBRARIES) + + if(NOT GLIBCONFIG_INCLUDE_DIR) + set(GLIB_FOUND FALSE) + endif(NOT GLIBCONFIG_INCLUDE_DIR) + + if(NOT GLIB_INCLUDE_DIR) + set(GLIB_FOUND FALSE) + endif(NOT GLIB_INCLUDE_DIR) + + set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR}) + + if(NOT GLIB_FOUND) + message(STATUS "Failed to find GLib 2.0") + endif(NOT GLIB_FOUND) + + set(GLIB2.0_FOUND ${GLIB_FOUND} PARENT_SCOPE) + set(GLIB2.0_INCLUDE_DIRS ${GLIB_INCLUDE_DIRS} PARENT_SCOPE) + set(GLIB2.0_LIBRARIES ${GLIB_LIBRARIES} PARENT_SCOPE) +endfunction(_GLIB_2_0_import) + +_GLIB_2_0_import() diff --git a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt index 9140a17..a1c4656 100644 --- a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt +++ b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt @@ -1,4 +1,4 @@ -set(RUY_BASE ${RuySource_DIR}/ruy) +set(RUY_BASE ${TensorFlowRuySource_DIR}/ruy) # # Ruy library @@ -14,7 +14,6 @@ list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/example_advanced.cc") list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/tune_tool.cc") list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/pmu.cc") list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/create_trmul_params.cc") -list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/prepare_packed_matrices.cc") list(APPEND RUY_INSTRUMENTATION_SRCS "${RUY_BASE}/profiler/instrumentation.cc") @@ -23,7 +22,7 @@ if(PROFILE_RUY) list(APPEND RUY_PROFILER_SRCS "${RUY_BASE}/profiler/treeview.cc") endif(PROFILE_RUY) -list(APPEND RUY_INCLUDES "${RuySource_DIR}") +list(APPEND RUY_INCLUDES "${TensorFlowRuySource_DIR}") add_library(ruy STATIC ${RUY_SRCS}) target_include_directories(ruy SYSTEM PUBLIC ${RUY_INCLUDES}) diff --git a/infra/nnfw/cmake/packages/RuyConfig.cmake b/infra/nnfw/cmake/packages/RuyConfig.cmake index 4e7cc24a..6f5f4b7 100644 --- a/infra/nnfw/cmake/packages/RuyConfig.cmake +++ b/infra/nnfw/cmake/packages/RuyConfig.cmake @@ -5,14 +5,14 @@ function(_Ruy_Build) return() endif(TARGET ruy) - nnas_find_package(RuySource QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.8 QUIET) nnfw_find_package(CpuInfo QUIET) - if(NOT RuySource_FOUND) + if(NOT TensorFlowRuySource_FOUND) message(STATUS "RUY: Source not found") set(Ruy_FOUND FALSE PARENT_SCOPE) return() - endif(NOT RuySource_FOUND) + endif(NOT TensorFlowRuySource_FOUND) if (NOT CpuInfo_FOUND) message(STATUS "RUY: CPUINFO not found") @@ -20,6 +20,17 @@ function(_Ruy_Build) return() endif(NOT CpuInfo_FOUND) + # Ruy's cmake requires cmake >= 3.14 + # If we ready cmake >= 3.14, enable below comment out code + #if(PROFILE_RUY) + # # Will be used on ruy build + # set(RUY_PROFILER ON) + #endif(PROFILE_RUY) + #add_extdirectory("${RuySource_DIR}" Ruy) + # + ## Ignore warning from ruy + #target_compile_options(ruy INTERFACE -Wno-comment) + add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Ruy" ruy) set(Ruy_FOUND TRUE PARENT_SCOPE) endfunction(_Ruy_Build) diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt index 9a7b240..f872b88 100644 --- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt +++ b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt @@ -52,6 +52,12 @@ target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FA set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON) target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl) +# Define TF_LITE_DISABLE_X86_NEON for debug build +# If we upgrade NEON2SSE version, we can remove below line +if(NEON2SSESource_FOUND) + target_compile_definitions(tensorflow-lite PRIVATE $<$:TF_LITE_DISABLE_X86_NEON>) +endif(NEON2SSESource_FOUND) + if(ANDROID) target_link_libraries(tensorflow-lite log) target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..") diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt deleted file mode 100644 index afee6e1..0000000 --- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt +++ /dev/null @@ -1,96 +0,0 @@ -# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile -# -# Tensorflow Lite library 2.3.0 -# -set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite) - -file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" - "${TENSORFLOW_LITE_BASE}/*.cc" - "${TENSORFLOW_LITE_BASE}/core/*.cc") - -file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc") - -file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc") - -file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" - "${TENSORFLOW_LITE_BASE}/core/api/*.cc") - -list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc") -list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc") - -file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc") - -file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc") - -list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS}) -list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS}) - -# externals -list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c") -list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c") - -# Build with mmap? true -# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch. -set(BUILD_WITH_MMAP TRUE) -if(${BUILD_WITH_MMAP}) - list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc") -else() - list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc") -endif() - -# Build with nnapi? true -# caution: this nnapi delegate comes from tflite, not ours. -set(BUILD_WITH_NNAPI TRUE) -if(${BUILD_WITH_NNAPI}) - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc") - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc") - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc") - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc") -else() - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc") - list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc") -endif() - -# ios: we don't support ios -list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc") - -# android -if(NOT ANDROID) - list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc") -endif() - -# exclude some source files -file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc" - "${TENSORFLOW_LITE_BASE}/*benchmark*.cc" - "${TENSORFLOW_LITE_BASE}/*example*.cc" - "${TENSORFLOW_LITE_BASE}/*tool*.cc") -list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS}) - -# include headers -list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}") -list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}") -list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include") - -if(NEON2SSESource_FOUND) - list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}") -endif(NEON2SSESource_FOUND) - -add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS}) -target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES}) -target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR}) -target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO") -set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON) -target_link_libraries(tensorflow-lite-2.3.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl) -if(NOT ANDROID AND ${BUILD_WITH_NNAPI}) - target_link_libraries(tensorflow-lite-2.3.0 rt) -endif() - -if(ANDROID) - target_link_libraries(tensorflow-lite-2.3.0 log) - target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..") -endif() diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake deleted file mode 100644 index c81958c..0000000 --- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake +++ /dev/null @@ -1,44 +0,0 @@ -if(BUILD_TENSORFLOW_LITE_2_3_0) - macro(return_unless VAR) - if(NOT ${VAR}) - message("TFLiteVanillaRun: ${VAR} NOT TRUE") - set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE) - return() - endif(NOT ${VAR}) - endmacro(return_unless) - - nnas_include(ExternalSourceTools) - nnas_include(OptionTools) - - nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET) - return_unless(TensorFlowSource_FOUND) - - # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl - nnas_find_package(AbseilSource QUIET) - return_unless(AbseilSource_FOUND) - nnfw_find_package(Eigen QUIET) - return_unless(Eigen_FOUND) - nnas_find_package(Farmhash QUIET) - return_unless(Farmhash_FOUND) - nnfw_find_package(FlatBuffers QUIET) - return_unless(FlatBuffers_FOUND) - nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) - return_unless(TensorFlowGEMMLowpSource_FOUND) - nnas_find_package(OouraFFTSource QUIET) - return_unless(OouraFFTSource_FOUND) - nnfw_find_package(Ruy QUIET) - return_unless(Ruy_FOUND) - - # TensorFlow Lite requires FP16 library's header only - nnas_find_package(Fp16Source QUIET) - return_unless(Fp16Source_FOUND) - - # Optional packages - nnas_find_package(NEON2SSESource QUIET) - - nnas_include(ExternalProjectTools) - add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.3.0) - - set(TensorFlowLite_2_3_0_FOUND TRUE) - return() -endif() diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake deleted file mode 100644 index 08e6374..0000000 --- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake +++ /dev/null @@ -1,9 +0,0 @@ -set(PACKAGE_VERSION "2.3.0") -set(PACKAGE_VERSION_EXACT FALSE) -set(PACKAGE_VERSION_COMPATIBLE FALSE) -set(PACKAGE_VERSION_UNSUITABLE TRUE) - -if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) - set(PACKAGE_VERSION_EXACT TRUE) - set(PACKAGE_VERSION_UNSUITABLE FALSE) -endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt new file mode 100644 index 0000000..d7e1d06 --- /dev/null +++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt @@ -0,0 +1,121 @@ +# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile +# +# Tensorflow Lite library 2.3.0 +# +set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite) + +file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" + "${TENSORFLOW_LITE_BASE}/*.cc" + "${TENSORFLOW_LITE_BASE}/core/*.cc") + +file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc") + +file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc") + +file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" + "${TENSORFLOW_LITE_BASE}/core/api/*.cc") + +list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc") +list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc") +list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/platform_profiler.cc") + +file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc") + +file(GLOB TFLITE_SCHEMA_UTIL_SRCS "${TENSORFLOW_LITE_BASE}/schema/*.cc") + +# Moved to kerenls/internal/utils +#file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc") + +list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS}) +#list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS}) +list(APPEND TFLITE_SRCS ${TFLITE_SCHEMA_UTIL_SRCS}) + +# externals +list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c") +list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c") + +# Build with mmap? true +# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch. +set(BUILD_WITH_MMAP TRUE) +if(${BUILD_WITH_MMAP}) + list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc") +else() + list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc") +endif() + +# Build with nnapi? true +# caution: this nnapi delegate comes from tflite, not ours. +set(BUILD_WITH_NNAPI TRUE) +if(${BUILD_WITH_NNAPI}) + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/utils.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/serialization.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc") +else() + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc") + list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc") +endif() + +# ios: we don't support ios +list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc") + +# android +if(NOT ANDROID) + list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc") +endif() + +# exclude some source files +file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc" + "${TENSORFLOW_LITE_BASE}/*benchmark*.cc" + "${TENSORFLOW_LITE_BASE}/*example*.cc" + "${TENSORFLOW_LITE_BASE}/*tool*.cc") +list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS}) + +# exclude some kernels (requires python3-dev package) +# TODO Enable these kernels by installing package on build system +file(GLOB_RECURSE TFLITE_KERNEL_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/variable_ops_wrapper.cc" + "${TENSORFLOW_LITE_BASE}/kernels/gradient/*.cc" + "${TENSORFLOW_LITE_BASE}/kernels/perception/*.cc") +list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_KERNEL_EXCLS}) + +# exclude kernel shim +file(GLOB_RECURSE TFLITE_SHIM_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/shim/*.cc") +list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_SHIM_EXCLS}) + +# include headers +list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}") +list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}") +list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include") +#list(APPEND TFLITE_INCLUDES "${Pybind11Source_DIR}/include") + +if(NEON2SSESource_FOUND) + list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}") +endif(NEON2SSESource_FOUND) + +add_library(tensorflow-lite-2.8.0 STATIC ${TFLITE_SRCS}) +target_include_directories(tensorflow-lite-2.8.0 SYSTEM PUBLIC ${TFLITE_INCLUDES}) +target_include_directories(tensorflow-lite-2.8.0 PRIVATE ${CpuInfoSource_DIR}) +target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO") +set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON) +target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl) +if(NOT ANDROID AND ${BUILD_WITH_NNAPI}) + target_link_libraries(tensorflow-lite-2.8.0 rt) +endif() + +# Define TF_LITE_DISABLE_X86_NEON for debug build +# If we upgrade NEON2SSE version, we can remove below line +if(NEON2SSESource_FOUND) + target_compile_definitions(tensorflow-lite-2.8.0 PRIVATE $<$:TF_LITE_DISABLE_X86_NEON>) +endif(NEON2SSESource_FOUND) + +if(ANDROID) + target_link_libraries(tensorflow-lite-2.8.0 log) + target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..") +endif() diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake new file mode 100644 index 0000000..1c80618 --- /dev/null +++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake @@ -0,0 +1,50 @@ +if(BUILD_TENSORFLOW_LITE_2_8_0) + macro(return_unless VAR) + if(NOT ${VAR}) + message("TFLite 2.8: ${VAR} NOT TRUE") + set(TensorFlowLite_2_8_0_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${VAR}) + endmacro(return_unless) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET) + return_unless(TensorFlowSource_FOUND) + + # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl + nnas_find_package(AbseilSource QUIET) + return_unless(AbseilSource_FOUND) + nnfw_find_package(Eigen QUIET) + return_unless(Eigen_FOUND) + nnas_find_package(Farmhash QUIET) + return_unless(Farmhash_FOUND) + nnfw_find_package(FlatBuffers QUIET) + return_unless(FlatBuffers_FOUND) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET) + return_unless(TensorFlowGEMMLowpSource_FOUND) + nnas_find_package(OouraFFTSource QUIET) + return_unless(OouraFFTSource_FOUND) + nnfw_find_package(Ruy QUIET) + return_unless(Ruy_FOUND) + + # TensorFlow Lite requires FP16 library's header only + nnas_find_package(Fp16Source QUIET) + return_unless(Fp16Source_FOUND) + + # TensorFlow Lite requires Pybind11 library's header only + # But Pybind11 requires python3-dev package + # TODO Enable below by installing package on build system + #nnas_find_package(Pybind11Source QUIET) + #return_unless(Pybind11Source_FOUND) + + # Optional packages + nnas_find_package(NEON2SSESource QUIET) + + nnas_include(ExternalProjectTools) + add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0) + + set(TensorFlowLite_2_8_0_FOUND TRUE) + return() +endif() diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake new file mode 100644 index 0000000..cd49d7b --- /dev/null +++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake @@ -0,0 +1,9 @@ +set(PACKAGE_VERSION "2.8.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf index 1150a5f..2b5994a 100644 --- a/infra/nnfw/config/gbs.conf +++ b/infra/nnfw/config/gbs.conf @@ -3,20 +3,11 @@ profile = profile.tizen [profile.tizen] -user=obs_viewer -obs = obs.tizen -repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile +repos = repo.tizen_base,repo.tizen_mobile buildroot = /home/GBS-ROOT/ -[obs.tizen] -url = http://api.tizen.org - [repo.tizen_mobile] url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/ [repo.tizen_base] url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/ - -[repo.tizen_one] -url = http://13.125.34.93/archive/tizen/ - diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323 index 421106c..0eac106 100644 --- a/infra/packaging/preset/20220323 +++ b/infra/packaging/preset/20220323 @@ -20,21 +20,26 @@ function preset_configure() # loco IR and related utilities REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Flatbuffer I/O - REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04") + REQUIRED_UNITS+=("mio-tflite280" "mio-circle04") # Data I/O REQUIRED_UNITS+=("dio-hdf5") # Circle compiler library (.circle -> .circle) REQUIRED_UNITS+=("luci") # Tools - REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef") + REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef") REQUIRED_UNITS+=("circle-tensordump" "circledump") - REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify") + REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter") REQUIRED_UNITS+=("luci-eval-driver") REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5") - REQUIRED_UNITS+=("circle-partitioner") + REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter") + REQUIRED_UNITS+=("circle-partitioner" "circle-operator") REQUIRED_UNITS+=("one-cmds") REQUIRED_UNITS+=("bcq-tools") + # Dependent modules needed for build + REQUIRED_UNITS+=("circlechef") + REQUIRED_UNITS+=("circle-verify") + NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)} # TODO Use "nncc configure" and "nncc build" diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows index 60500b1..14917b3 100644 --- a/infra/packaging/preset/20220323_windows +++ b/infra/packaging/preset/20220323_windows @@ -15,20 +15,26 @@ function preset_configure() # loco IR and related utilities REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Flatbuffer I/O - REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04") + REQUIRED_UNITS+=("mio-tflite280" "mio-circle04") # Data I/O REQUIRED_UNITS+=("dio-hdf5") # Circle compiler library (.circle -> .circle) REQUIRED_UNITS+=("luci") # Tools - REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef") - REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify") + REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef") + REQUIRED_UNITS+=("circle-tensordump" "circledump") + REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter") REQUIRED_UNITS+=("luci-eval-driver") REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5") - REQUIRED_UNITS+=("circle-partitioner") + REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter") + REQUIRED_UNITS+=("circle-partitioner" "circle-operator") REQUIRED_UNITS+=("one-cmds") REQUIRED_UNITS+=("bcq-tools") + # Dependent modules needed for build + REQUIRED_UNITS+=("circlechef") + REQUIRED_UNITS+=("circle-verify") + NPROC=$(cat /proc/cpuinfo | grep -c processor) # TODO Use "nncc configure" and "nncc build" diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323 index 0d44818..5f43b23 100644 --- a/infra/packaging/res/tf2nnpkg.20220323 +++ b/infra/packaging/res/tf2nnpkg.20220323 @@ -104,6 +104,6 @@ fi ${ONE_IMPORT_BCQ_SCRIPT} # optimize -"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle" +"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle" "${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle" diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh index 6a857d2..51cba92 100644 --- a/infra/scripts/compiler_modules.sh +++ b/infra/scripts/compiler_modules.sh @@ -1,5 +1,8 @@ #!/bin/bash +# NOTE this file is sourced from, for the purpose of +# - configure_compiler_coverage.sh: to get test coverage for release criteria + # Don't run this script [[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return @@ -8,13 +11,14 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec" DEBUG_BUILD_ITEMS+=";hermes;hermes-std" DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo" DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone" -DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite;mio-tflite260;mio-tflite280" +DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite280;dio-hdf5" DEBUG_BUILD_ITEMS+=";tflite2circle" DEBUG_BUILD_ITEMS+=";luci" DEBUG_BUILD_ITEMS+=";luci-interpreter" DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test" DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer" -DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver" +DEBUG_BUILD_ITEMS+=";circle-eval-diff" +DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator" DEBUG_BUILD_ITEMS+=";circle-verify" DEBUG_BUILD_ITEMS+=";circle-tensordump" DEBUG_BUILD_ITEMS+=";tflchef;circlechef" @@ -25,3 +29,5 @@ DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test" DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test" DEBUG_BUILD_ITEMS+=";pota-quantization-value-test" DEBUG_BUILD_ITEMS+=";circle-part-value-test" +DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test" +DEBUG_BUILD_ITEMS+=";circle-operator-test" diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh index 7146141..2e603b5 100755 --- a/infra/scripts/docker_build_nncc.sh +++ b/infra/scripts/docker_build_nncc.sh @@ -27,13 +27,13 @@ else fi # prepare tensorflow -if [ -d $TENSORFLOW_PREFIX ]; then +if [ -n "$TENSORFLOW_PREFIX" ]; then DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow" CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow" fi # prepare onnx -if [ -d $ONNXRUNTIME_PREFIX ]; then +if [ -n "$ONNXRUNTIME_PREFIX" ]; then DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime" CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime" fi diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh index 26d8de4..b3428e0 100755 --- a/infra/scripts/docker_build_test_x64.sh +++ b/infra/scripts/docker_build_test_x64.sh @@ -32,8 +32,8 @@ pushd $ROOT_PATH > /dev/null export DOCKER_ENV_VARS export DOCKER_VOLUMES export BUILD_OPTIONS -# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5 -CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF $BUILD_OPTIONS' && \ + +CMD="export OPTIONS='$BUILD_OPTIONS' && \ export BUILD_TYPE=Release && \ cp -nv Makefile.template Makefile && \ make all install build_test_suite" diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh index 06cf880..afdd3b9 100755 --- a/infra/scripts/docker_collect_nnpkg_resources.sh +++ b/infra/scripts/docker_collect_nnpkg_resources.sh @@ -28,13 +28,13 @@ else fi # prepare tensorflow -if [ -d $TENSORFLOW_PREFIX ]; then +if [ -n "$TENSORFLOW_PREFIX" ]; then DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow" CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow" fi # prepare onnx -if [ -d $ONNXRUNTIME_PREFIX ]; then +if [ -n "$ONNXRUNTIME_PREFIX" ]; then DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime" CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime" fi @@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Circle compiler library (.circle -> .circle) REQUIRED_UNITS+=("luci") # Flatbuffer I/O -REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04") +REQUIRED_UNITS+=("mio-tflite280" "mio-circle04") # Tools REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter") REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify") diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh index 697fed8..2510d9c 100755 --- a/infra/scripts/test_ubuntu_runtime_mixed.sh +++ b/infra/scripts/test_ubuntu_runtime_mixed.sh @@ -55,8 +55,8 @@ echo "GeneratedTests.squeeze_relaxed" >> $SKIPLIST_PREFIX.union # Run the test export OP_BACKEND_Conv2D="cpu" -export OP_BACKEND_MaxPool2D="acl_cl" -export OP_BACKEND_AvgPool2D="acl_neon" +export OP_BACKEND_Pool2D="acl_cl" +export OP_BACKEND_FullyConnected="acl_neon" export ACL_LAYOUT="NCHW" export RUY_THREADS=4 NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed" diff --git a/infra/scripts/unittest_compiler_xml.sh b/infra/scripts/unittest_compiler_xml.sh index 46d3bc8..6e9e8ad 100755 --- a/infra/scripts/unittest_compiler_xml.sh +++ b/infra/scripts/unittest_compiler_xml.sh @@ -7,7 +7,9 @@ set -eo pipefail CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_PATH="$CURRENT_PATH/../../" NNCC_WORKSPACE=${NNCC_WORKSPACE:-${ROOT_PATH}build} -UNITTEST_REPORT_DIR=${NNCC_WORKSPACE}/unittest_compiler_xml + +# Use fixed absolute report dir for CI +UNITTEST_REPORT_DIR=${ROOT_PATH}build/unittest_compiler_xml for i in "$@" do @@ -25,5 +27,10 @@ fi for TEST_BIN in `find ${NNCC_WORKSPACE}/compiler -type f -executable -name *_test`; do TEST_NAME="$(basename -- $TEST_BIN)" - LUGI_LOG=999 $TEST_BIN --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml" + TEST_DIR="$(dirname $TEST_BIN)" + + # Execute on test directory to find related file + pushd $TEST_DIR > /dev/null + LUGI_LOG=999 ./$TEST_NAME --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml" + popd > /dev/null done diff --git a/nnpackage/examples/README.md b/nnpackage/examples/README.md index fb0bae3..951048b 100644 --- a/nnpackage/examples/README.md +++ b/nnpackage/examples/README.md @@ -1,5 +1,12 @@ # NNPackage example +## Package version 1.3.0 + +### two_tflites + +- Model file: two TensorFlow Lite models +- It has two tflite models with pkg-input, pkg-output and model-connect fields. + ## Package version 1.1.0 ### one_op_in_tflite diff --git a/nnpackage/examples/v1.3.0/two_tflites/README.md b/nnpackage/examples/v1.3.0/two_tflites/README.md new file mode 100644 index 0000000..3fcbe2d --- /dev/null +++ b/nnpackage/examples/v1.3.0/two_tflites/README.md @@ -0,0 +1,28 @@ +## How to create + +``` +$ wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz +$ tar -zxf mobilenet_v1_1.0_224.tgz + +$ python tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite <( echo 0-1 ) mv1.0_1.tflite +$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 0 ) mv1.0.tflite +$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 1 ) mv1.1.tflite + +# make sure three tflite is valid +$ ./Product/out/bin/tflite_comparator mv1.0_1.tflite +$ ./Product/out/bin/tflite_comparator mv1.0.tflite +$ ./Product/out/bin/tflite_comparator mv1.1.tflite + +$ tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -m mv1.0.tflite mv1.1.tflite -p two_tflites +$ cat two_tflites/metadata/MANIFEST +{ + "major-version" : "1", + "minor-version" : "2", + "patch-version" : "0", + "configs" : [ ], + "models" : [ "mv1.0.tflite", "mv1.1.tflite" ], + "model-types" : [ "tflite", "tflite" ] +} + +# update minor-version, and add additional fields manually +``` diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST new file mode 100644 index 0000000..9d9e21a --- /dev/null +++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST @@ -0,0 +1,11 @@ +{ + "major-version" : "1", + "minor-version" : "3", + "patch-version" : "0", + "configs" : [ ], + "models" : [ "mv1.0.tflite", "mv1.1.tflite" ], + "model-types" : [ "tflite", "tflite" ], + "pkg-inputs" : [ "0:0:0" ], + "pkg-outputs" : [ "1:0:0" ], + "model-connect" : [ { "from" : "0:0:0", "to" : [ "1:0:0" ] } ] +} diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 new file mode 100644 index 0000000..59a6b90 Binary files /dev/null and b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 differ diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 new file mode 100644 index 0000000..2251157 Binary files /dev/null and b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 differ diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite new file mode 100644 index 0000000..03f30c7 Binary files /dev/null and b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite differ diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite new file mode 100644 index 0000000..e3b4f8d Binary files /dev/null and b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite differ diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs index 3972056..8ad444d 100644 --- a/nnpackage/schema/circle_schema.fbs +++ b/nnpackage/schema/circle_schema.fbs @@ -1,4 +1,4 @@ -// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved +// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved // Copyright 2017 The TensorFlow Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,6 +28,7 @@ // `asymmetric_quantize_inputs` for several operator options // Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added. // Version 0.3: SHUFFLED16x1FLOAT32 is added. +// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema. namespace circle; @@ -52,6 +53,14 @@ enum TensorType : byte { COMPLEX64 = 8, INT8 = 9, FLOAT64 = 10, + COMPLEX128 = 11, + UINT64 = 12, + // Experimental: Resource and variant types are experimental, that are subject + // to change. Do not implement custom kernels using resource & variant types + // now. + RESOURCE = 13, + VARIANT = 14, + UINT32 = 15, } // Custom quantization parameters for experimenting with new quantization @@ -224,8 +233,11 @@ table Tensor { // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. - -enum BuiltinOperator : ubyte { +// LINT.IfChange +enum BuiltinOperator : int32 { + BCQ_GATHER = -4, + BCQ_FULLY_CONNECTED = -3, + INSTANCE_NORM = -2, ADD = 0, AVERAGE_POOL_2D = 1, CONCATENATION = 2, @@ -258,7 +270,6 @@ enum BuiltinOperator : ubyte { SPACE_TO_DEPTH = 26, SVDF = 27, TANH = 28, - // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS CONCAT_EMBEDDINGS = 29, SKIP_GRAM = 30, CALL = 31, @@ -360,10 +371,28 @@ enum BuiltinOperator : ubyte { DENSIFY = 124, SEGMENT_SUM = 125, BATCH_MATMUL = 126, - BCQ_GATHER = 252, - BCQ_FULLY_CONNECTED = 253, - INSTANCE_NORM = 254, -} + PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + CUMSUM = 128, + CALL_ONCE = 129, + BROADCAST_TO = 130, + RFFT2D = 131, + CONV_3D = 132, + IMAG=133, + REAL=134, + COMPLEX_ABS=135, + HASHTABLE = 136, + HASHTABLE_FIND = 137, + HASHTABLE_IMPORT = 138, + HASHTABLE_SIZE = 139, + REDUCE_ALL = 140, + CONV_3D_TRANSPOSE = 141, + VAR_HANDLE = 142, + READ_VARIABLE = 143, + ASSIGN_VARIABLE = 144, + BROADCAST_ARGS = 145, + RANDOM_STANDARD_NORMAL = 146, +} +// LINT.ThenChange(nnapi_linter/linter.proto) // Options for the builtin operators. union BuiltinOptions { @@ -468,6 +497,19 @@ union BuiltinOptions { DensifyOptions, SegmentSumOptions, BatchMatMulOptions, + CumsumOptions, + CallOnceOptions, + BroadcastToOptions, + Rfft2dOptions, + Conv3DOptions, + HashtableOptions, + HashtableFindOptions, + HashtableImportOptions, + HashtableSizeOptions, + VarHandleOptions, + ReadVariableOptions, + AssignVariableOptions, + RandomOptions, BCQGatherOptions = 252, BCQFullyConnectedOptions = 253, InstanceNormOptions = 254, @@ -493,6 +535,18 @@ table Conv2DOptions { dilation_h_factor:int = 1; } +// Options for both Conv3D and Conv3DTranspose. +table Conv3DOptions { + padding:Padding; + stride_d:int; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_d_factor:int = 1; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + table Pool2DOptions { padding:Padding; stride_w:int; @@ -599,6 +653,8 @@ table ConcatenationOptions { table AddOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 3. + pot_scale_int16:bool = true; } table MulOptions { @@ -606,6 +662,7 @@ table MulOptions { } table L2NormOptions { + // This field is currently ignored in the L2 Norm Op. fused_activation_function:ActivationFunctionType; } @@ -679,6 +736,7 @@ table ResizeBilinearOptions { table ResizeNearestNeighborOptions { align_corners: bool; + half_pixel_centers: bool; } // A call operation options @@ -719,6 +777,8 @@ table DepthToSpaceOptions { table SubOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; } table DivOptions { @@ -740,6 +800,8 @@ table EmbeddingLookupSparseOptions { table GatherOptions { axis: int; + // Parameters for Gather version 5 or above. + batch_dims: int = 0; } table TransposeOptions { @@ -962,6 +1024,10 @@ table IfOptions { else_subgraph_index:int; } +table CallOnceOptions { + init_subgraph_index:int; +} + table WhileOptions { cond_subgraph_index:int; body_subgraph_index:int; @@ -988,6 +1054,54 @@ table SegmentSumOptions { table BatchMatMulOptions { adjoint_lhs:bool; adjoint_rhs:bool; + // Parameters for BatchMatMul version 4 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table CumsumOptions { + exclusive:bool; + reverse:bool; +} + +table BroadcastToOptions { +} + +table Rfft2dOptions { +} + +table HashtableOptions { + // The identity of hash tables. This identity will be used across different + // subgraphs in the same interpreter instance. + table_id:int; + key_dtype:TensorType; + value_dtype:TensorType; +} + +table HashtableFindOptions { +} + +table HashtableImportOptions { +} + +table HashtableSizeOptions { +} + +table VarHandleOptions { + container:string; + shared_name:string; +} + +table ReadVariableOptions { +} + +table AssignVariableOptions { +} + +table RandomOptions { + seed: int; + seed2: int; } table BCQGatherOptions { @@ -1008,12 +1122,21 @@ table InstanceNormOptions { // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { - builtin_code:BuiltinOperator; + // This field is for backward compatibility. This field will be used when + // the value of the extended builtin_code field has less than + // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + deprecated_builtin_code:byte; custom_code:string; // The version of the operator. The version need to be bumped whenever new // parameters are introduced into an op. version:int = 1; + + // This field is introduced for resolving op builtin code shortage problem + // (the original BuiltinOperator enum field was represented as a byte). + // This field will be used when the value of the extended builtin_code field + // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + builtin_code:BuiltinOperator; } enum CustomOptionsFormat : byte { @@ -1104,6 +1227,35 @@ table Metadata { buffer:uint; } +// Map from an alias name of tensor to tensor index in the graph. +// This is used in Signature def. +table TensorMap { + // Represents the alias to use for this tensor. + name:string; + + // The actual tensor index in the primary graph, that 'name' corresponds to. + tensor_index:uint; +} + +// This corresponds to SignatureDef in Tensorflow SavedModel. +// The SignatureDef will be part of the SavedModel provided for conversion. +table SignatureDef { + // Named inputs for this signature. + inputs:[TensorMap]; + + // Named outputs for this signature. + outputs:[TensorMap]; + + // Key value which was in the Tensorflow SavedModel SignatureDef map. + signature_key:string; + + // Model tag, deprecated. + deprecated_tag:string (deprecated); + + // Index of subgraphs that corresponds to the exported method. + subgraph_index:uint; +} + table Model { // Version of the schema. version:uint; @@ -1132,6 +1284,9 @@ table Model { // Metadata about the model. metadata:[Metadata]; + + // Optional SignatureDefs for the model. + signature_defs:[SignatureDef]; } root_type Model; diff --git a/packaging/ABSEIL.tar.gz b/packaging/ABSEIL.tar.gz index e056541..dc7aab5 100644 Binary files a/packaging/ABSEIL.tar.gz and b/packaging/ABSEIL.tar.gz differ diff --git a/packaging/CPUINFO.tar.gz b/packaging/CPUINFO.tar.gz index ced5deb..a74fe35 100644 Binary files a/packaging/CPUINFO.tar.gz and b/packaging/CPUINFO.tar.gz differ diff --git a/packaging/FP16.tar.gz b/packaging/FP16.tar.gz index ebd2764..78c7876 100644 Binary files a/packaging/FP16.tar.gz and b/packaging/FP16.tar.gz differ diff --git a/packaging/RUY.tar.gz b/packaging/RUY.tar.gz deleted file mode 100644 index 9ad14fe..0000000 Binary files a/packaging/RUY.tar.gz and /dev/null differ diff --git a/packaging/TENSORFLOW-2.8.0-RUY.tar.gz b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz new file mode 100644 index 0000000..8e67347 Binary files /dev/null and b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz differ diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec index 324fe1d..0518541 100644 --- a/packaging/nnfw.spec +++ b/packaging/nnfw.spec @@ -1,6 +1,6 @@ Name: nnfw Summary: nnfw -Version: 1.20.0 +Version: 1.21.0 Release: 1 Group: Development License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0 @@ -12,7 +12,7 @@ Source1001: nnapi_test_generated.tar.gz #Source1002: GTEST.tar.gz Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz Source1004: GEMMLOWP.tar.gz -Source1005: RUY.tar.gz +Source1005: TENSORFLOW-2.8.0-RUY.tar.gz Source1006: CPUINFO.tar.gz Source1007: XNNPACK.tar.gz Source1008: FXDIV.tar.gz @@ -28,21 +28,26 @@ Source2001: nnfw.pc.in Source2002: nnfw-plugin.pc.in %{!?build_type: %define build_type Release} +%{!?npud_build: %define npud_build 1} %{!?trix_support: %define trix_support 1} %{!?coverage_build: %define coverage_build 0} %{!?test_build: %define test_build 0} %{!?extra_option: %define extra_option %{nil}} +%{!?config_support: %define config_support 1} + %if %{coverage_build} == 1 +# Coverage test requires debug build runtime +%define build_type Debug %define test_build 1 %endif BuildRequires: cmake # Require flatbuffers-devel for onert frontend (model loading) -BuildRequires: flatbuffers-devel +BuildRequires: pkgconfig(flatbuffers) %ifarch %{arm} aarch64 # Require python for acl-ex library build pre-process -BuildRequires: python +BuildRequires: python3 BuildRequires: libarmcl-devel >= v21.02 %endif @@ -50,17 +55,21 @@ Requires(post): /sbin/ldconfig Requires(postun): /sbin/ldconfig %if %{test_build} == 1 -BuildRequires: boost-devel -BuildRequires: tensorflow-lite-devel +BuildRequires: pkgconfig(boost) +BuildRequires: pkgconfig(tensorflow-lite) BuildRequires: hdf5-devel BuildRequires: libaec-devel -BuildRequires: zlib-devel -BuildRequires: libjpeg-devel +BuildRequires: pkgconfig(zlib) +BuildRequires: pkgconfig(libjpeg) BuildRequires: gtest-devel %endif +%if %{npud_build} == 1 +BuildRequires: pkgconfig(glib-2.0) +%endif + %if %{trix_support} == 1 -BuildRequires: npu-engine-devel +BuildRequires: pkgconfig(npu-engine) %endif %description @@ -91,7 +100,18 @@ Minimal test binary for VD manual test Summary: NNFW Test %description test -NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime. +NNFW test rpm. +If you want to use test package, you should install runtime package which is build with test build option +If you want to get coverage info, you should install runtime package which is build with coverage build option +# TODO Use release runtime pacakge for test +%endif + +%if %{npud_build} == 1 +%package npud +Summary: NPU daemon + +%description npud +NPU daemon for optimal management of NPU hardware %endif %ifarch armv7l @@ -112,22 +132,40 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime. %define install_dir %{_prefix} %define install_path %{buildroot}%{install_dir} -%define build_env NNFW_WORKSPACE=build -%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on +%define nnfw_workspace build +%define build_env NNFW_WORKSPACE=%{nnfw_workspace} -# Set option for test build (and coverage test build) +# Path to install test bin and scripts (test script assumes path Product/out) +# TODO Share path with release package %define test_install_home /opt/usr/nnfw-test %define test_install_dir %{test_install_home}/Product/out %define test_install_path %{buildroot}%{test_install_dir} -%define coverage_option %{nil} + +# Set option for test build (and coverage test build) +%define option_test -DENABLE_TEST=OFF +%define option_coverage %{nil} %define test_suite_list infra/scripts tests/scripts -%define test_build_type %{build_type} + +%if %{test_build} == 1 +# ENVVAR_ONERT_CONFIG: Use environment variable for runtime core configuration and debug +%define option_test -DENABLE_TEST=ON -DENVVAR_ONERT_CONFIG=ON +%endif # test_build + +# Set option for configuration +%define option_config %{nil} +%if %{config_support} == 1 +%if %{npud_build} == 1 +# ENVVAR_NPUD_CONFIG: Use environment variable for npud configuration and debug +%define option_config -DENVVAR_NPUD_CONFIG=ON +%endif # npud_build +%endif # config_support + %if %{coverage_build} == 1 -%define coverage_option -DENABLE_COVERAGE=ON -%define test_build_type Debug -%endif -%define test_build_env NNFW_INSTALL_PREFIX=%{test_install_path} NNFW_WORKSPACE=build_for_test -%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_ONERT_CONFIG=ON +%define option_coverage -DENABLE_COVERAGE=ON +%endif # coverage_build + +%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DBUILD_MINIMAL_SAMPLE=ON \\\ + %{option_test} %{option_coverage} %{option_config} %{extra_option} %prep %setup -q @@ -153,17 +191,13 @@ tar -xf %{SOURCE1016} -C ./externals %build %ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 # runtime build -%{build_env} ./nnfw configure %{build_options} %{extra_option} +%{build_env} ./nnfw configure %{build_options} %{build_env} ./nnfw build -j4 # install in workspace # TODO Set install path %{build_env} ./nnfw install %if %{test_build} == 1 -# test runtime -# TODO remove duplicated build process -%{test_build_env} ./nnfw configure %{test_build_options} %{extra_option} -%{test_build_env} ./nnfw build -j4 %if %{coverage_build} == 1 pwd > tests/scripts/build_path.txt %endif # coverage_build @@ -195,19 +229,37 @@ install -m 0644 ./nnfw.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw.pc install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin.pc %if %{test_build} == 1 -%{test_build_env} ./nnfw install +mkdir -p %{test_install_path}/bin +mkdir -p %{test_install_path}/unittest +mkdir -p %{test_install_path}/unittest_standalone +mkdir -p %{test_install_path}/test + +install -m 755 build/out/bin/nnapi_test %{test_install_path}/bin +install -m 755 build/out/bin/nnpackage_run %{test_install_path}/bin +install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin +install -m 755 build/out/bin/tflite_run %{test_install_path}/bin +install -m 755 build/out/unittest/* %{test_install_path}/unittest +install -m 755 build/out/unittest_standalone/*_test %{test_install_path}/unittest_standalone +install -m 755 build/out/unittest_standalone/test_* %{test_install_path}/unittest_standalone +cp -r build/out/test/* %{test_install_path}/test +cp -r build/out/unittest_standalone/nnfw_api_gtest_models %{test_install_path}/unittest_standalone + # Share test script with ubuntu (ignore error if there is no list for target) -cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/. -cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip +cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/unittest/. +cp %{test_install_path}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/unittest/nnapi_gtest.skip tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home} %if %{coverage_build} == 1 mkdir -p %{buildroot}%{test_install_home}/gcov -find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \; +find %{nnfw_workspace} -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \; install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt %endif # coverage_build %endif # test_build +%if %{npud_build} == 1 +install -m 755 build/out/bin/npud %{buildroot}%{_bindir} +%endif + %endif %post -p /sbin/ldconfig @@ -256,6 +308,15 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t %endif # arm armv7l armv7hl aarch64 %endif # test_build +%if %{npud_build} == 1 +%files npud +%manifest %{name}.manifest +%defattr(-,root,root,-) +%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 +%{_bindir}/npud +%endif # arm armv7l armv7hl aarch64 x86_64 %ix86 +%endif # npud_build + %changelog * Thu Mar 15 2018 Chunseok Lee - Initial spec file for nnfw diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe new file mode 100644 index 0000000..b9c2ab8 --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe @@ -0,0 +1,43 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } +} +operand { + name: "gamma" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "beta" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } +} +operation { + type: "InstanceNorm" + input: "ifm" + input: "gamma" + input: "beta" + output: "ofm" + instance_norm_options { + epsilon: 0.00001 + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.rule b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule new file mode 100644 index 0000000..a17692d --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "GAMMA_S16" $(tensor_dtype gamma) '=' INT16 +RULE "BETA_S16" $(tensor_dtype beta) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe new file mode 100644 index 0000000..b9c2ab8 --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe @@ -0,0 +1,43 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } +} +operand { + name: "gamma" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "beta" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } +} +operation { + type: "InstanceNorm" + input: "ifm" + input: "gamma" + input: "beta" + output: "ofm" + instance_norm_options { + epsilon: 0.00001 + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.rule b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule new file mode 100644 index 0000000..e62dd48 --- /dev/null +++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "GAMMA_U8" $(tensor_dtype gamma) '=' UINT8 +RULE "BETA_U8" $(tensor_dtype beta) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe new file mode 100644 index 0000000..b31e160 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe @@ -0,0 +1,30 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 2 } +} +operand { + name: "ofm" + type: INT64 + shape { dim: 1 dim: 4 } +} +operand { + name: "argmax/dim" + type: INT32 + shape { } + filler { + tag: "explicit" + arg: "-1" + } +} +operation { + type: "ArgMax" + argmax_options { + output_type: INT64 + } + input: "ifm" + input: "argmax/dim" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Densify_000/test.recipe b/res/TensorFlowLiteRecipes/Densify_000/test.recipe new file mode 100644 index 0000000..480c52f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Densify_000/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse" + type: FLOAT32 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "2" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "3" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "4" + } + make_sparse: true +} +operand { + name: "dense" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse" + output: "dense" +} +operation { + type: "Add" + input: "in" + input: "dense" + output: "out" + add_options { + activation: NONE + } +} +input: "in" +output: "out" diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe new file mode 100644 index 0000000..572badf --- /dev/null +++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "x" + type: FLOAT32 + shape { dim: 2 dim: 4 } +} +operand { + name: "y" + type: FLOAT32 + shape { dim: 2 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 2 dim: 2 } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + keep_num_dims: true + } + input: "x" + input: "y" + input: "" + output: "out" +} +input: "x" +input: "y" +output: "out" diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule new file mode 100644 index 0000000..01518e5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule @@ -0,0 +1,7 @@ +# To check if FullyConnected with non-const weight is replaced by MatMul +# with replace_non_const_fc_with_batch_matmul pass + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "BATCH_MATMUL_EXIST" $(op_count BATCH_MATMUL) '=' 1 +RULE "NO_FULLY_CONNECTED" $(op_count FULLY_CONNECTED) '=' 0 diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe new file mode 100644 index 0000000..ea604b2 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse" + type: FLOAT32 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "2" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "3" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "4" + } + make_sparse: true +} +operand { + name: "dense" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse" + output: "dense" +} +operation { + type: "Add" + input: "ifm" + input: "dense" + output: "ofm" + add_options { + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe new file mode 100644 index 0000000..6e1083f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe @@ -0,0 +1,54 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse16" + type: FLOAT16 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "2" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "3" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "4" + } + make_sparse: true +} +operand { + name: "dense16" + type: FLOAT16 + shape { dim: 4 dim: 4 } +} +operand { + name: "dense32" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse16" + output: "dense16" +} +operation { + type: "Dequantize" + input: "dense16" + output: "dense32" +} +operation { + type: "Add" + input: "ifm" + input: "dense32" + output: "ofm" + add_options { + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe new file mode 100644 index 0000000..5f212a7 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe @@ -0,0 +1,41 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "float16" + type: FLOAT16 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "dequantized" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Dequantize" + input: "float16" + output: "dequantized" +} +operation { + type: "Add" + input: "ifm" + input: "dequantized" + output: "ofm" + add_options { + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe index b3247f2..afb9a9c 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe @@ -12,9 +12,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } } operand { name: "Addition" @@ -25,9 +22,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "Addition_add_param" @@ -39,9 +33,6 @@ operand { tag: "explicit" arg: "-2.04724" } - quant { - quantized_dimension: 0 - } } operand { name: "Hole" @@ -52,11 +43,6 @@ operand { dim: 2 dim: 2 } - quant { - min: 0 - max: 255 - quantized_dimension: 0 - } } operand { name: "conv2d_transpose" @@ -67,9 +53,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "input_size" @@ -84,9 +67,6 @@ operand { arg: "4" arg: "1" } - quant { - quantized_dimension: 0 - } } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe index 89a344f..b1c9784 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe @@ -12,9 +12,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } } operand { name: "Addition" @@ -25,9 +22,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "Addition_add_param" @@ -45,9 +39,6 @@ operand { arg: "1" arg: "2" arg: "3" arg: "4" arg: "-1" arg: "-2" arg: "-3" arg: "-4" } - quant { - quantized_dimension: 0 - } } operand { name: "Hole" @@ -58,11 +49,6 @@ operand { dim: 2 dim: 2 } - quant { - min: 0 - max: 255 - quantized_dimension: 0 - } } operand { name: "conv2d_transpose" @@ -73,9 +59,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "input_size" @@ -90,9 +73,6 @@ operand { arg: "4" arg: "1" } - quant { - quantized_dimension: 0 - } } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe index cfea306..4265514 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe @@ -12,9 +12,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } } operand { name: "Addition" @@ -25,9 +22,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "Addition_add_param" @@ -39,9 +33,6 @@ operand { tag: "explicit" arg: "-2.04724" } - quant { - quantized_dimension: 0 - } } operand { name: "Hole" @@ -52,11 +43,6 @@ operand { dim: 2 dim: 2 } - quant { - min: 0 - max: 255 - quantized_dimension: 0 - } } operand { name: "conv2d_transpose" @@ -67,9 +53,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "input_size" @@ -84,9 +67,6 @@ operand { arg: "4" arg: "1" } - quant { - quantized_dimension: 0 - } } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe index 65248f2..ef329e1 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe @@ -12,9 +12,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3" @@ -25,9 +22,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_add_param" @@ -39,9 +33,6 @@ operand { tag: "explicit" arg: "-2.04724" } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_mul_0" @@ -52,9 +43,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_mul_0_param" @@ -66,9 +54,6 @@ operand { tag: "explicit" arg: "2.00834" } - quant { - quantized_dimension: 0 - } } operand { name: "Hole" @@ -79,11 +64,6 @@ operand { dim: 2 dim: 1 } - quant { - min: 0 - max: 255 - quantized_dimension: 0 - } } operand { name: "conv2d_transpose" @@ -94,9 +74,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "conv2d_transpose/input_sizes" @@ -111,9 +88,6 @@ operand { arg: "4" arg: "1" } - quant { - quantized_dimension: 0 - } } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe index babf5af..1b329ba 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe @@ -12,9 +12,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3" @@ -25,9 +22,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_add_param" @@ -39,9 +33,6 @@ operand { tag: "explicit" arg: "-2.04724" } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_mul_0" @@ -52,9 +43,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_mul_0_param" @@ -66,9 +54,6 @@ operand { tag: "explicit" arg: "2.00834" } - quant { - quantized_dimension: 0 - } } operand { name: "Hole" @@ -79,11 +64,6 @@ operand { dim: 2 dim: 2 } - quant { - min: 0 - max: 255 - quantized_dimension: 0 - } } operand { name: "conv2d_transpose" @@ -94,9 +74,6 @@ operand { dim: 4 dim: 1 } - quant { - quantized_dimension: 0 - } } operand { name: "conv2d_transpose/input_sizes" @@ -111,9 +88,6 @@ operand { arg: "4" arg: "1" } - quant { - quantized_dimension: 0 - } } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe index e40fe4f..a8af8e4 100644 --- a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe @@ -8,10 +8,6 @@ operand { dim: 1 dim: 2 } - quant { - quantized_dimension: 0 - } - is_variable: false } operand { name: "conv2d_transpose/input_sizes" @@ -26,10 +22,6 @@ operand { arg: "1" arg: "2" } - quant { - quantized_dimension: 0 - } - is_variable: false } operand { name: "FusedBatchNormV3" @@ -42,10 +34,6 @@ operand { arg: "-2.04724" arg: "-7.80109" } - quant { - quantized_dimension: 0 - } - is_variable: false } operand { name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes" @@ -61,10 +49,6 @@ operand { arg: "0.0" arg: "0.1" } - quant { - quantized_dimension: 0 - } - is_variable: false } operand { name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2" @@ -75,10 +59,6 @@ operand { dim: 1 dim: 2 } - quant { - quantized_dimension: 0 - } - is_variable: false } operand { name: "FusedBatchNormV3_mul_0" @@ -89,9 +69,6 @@ operand { dim: 1 dim: 2 } - quant { - quantized_dimension: 0 - } } operand { name: "FusedBatchNormV3_mul_0_param" @@ -104,9 +81,6 @@ operand { arg: "2.00834" arg: "1.00344" } - quant { - quantized_dimension: 0 - } } operand { name: "Relu6" @@ -117,10 +91,6 @@ operand { dim: 1 dim: 2 } - quant { - quantized_dimension: 0 - } - is_variable: false } operation { type: "TransposeConv" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe new file mode 100644 index 0000000..c28e508 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe @@ -0,0 +1,135 @@ +operand { + name: "Const_transposed" + type: FLOAT32 + shape { + dim: 1 + dim: 3 + dim: 3 + dim: 2 + } + filler { + tag: "gaussian" + arg: "0.0" + arg: "0.1" + } +} +operand { + name: "Output" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 1 + } +} +operand { + name: "FusedBatchNormV3_add_param" + type: FLOAT32 + shape { + dim: 1 + } + filler { + tag: "explicit" + arg: "-2.04724" + } +} +operand { + name: "FusedBatchNormV3_mul_0" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 1 + } +} +operand { + name: "FusedBatchNormV3_mul_0_param" + type: FLOAT32 + shape { + dim: 1 + } + filler { + tag: "explicit" + arg: "2.00834" + } +} +operand { + name: "Input" + type: FLOAT32 + shape { + dim: 1 + dim: 2 + dim: 2 + dim: 2 + } +} +operand { + name: "conv2d_transpose" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 1 + } +} +operand { + name: "conv2d_transpose/input_sizes" + type: INT32 + shape { + dim: 4 + } + filler { + tag: "explicit" + arg: "1" + arg: "4" + arg: "4" + arg: "1" + } +} +operand { + name: "conv2d_transpose/bias" + type: FLOAT32 + shape { + dim: 1 + } + filler { + tag: "explicit" + arg: "1.03" + } +} +operation { + type: "TransposeConv" + input: "conv2d_transpose/input_sizes" + input: "Const_transposed" + input: "Input" + input: "conv2d_transpose/bias" + output: "conv2d_transpose" + transpose_conv_options { + padding: VALID + stride_w: 1 + stride_h: 1 + } +} +operation { + type: "Mul" + input: "conv2d_transpose" + input: "FusedBatchNormV3_mul_0_param" + output: "FusedBatchNormV3_mul_0" + mul_options { + activation: NONE + } +} +operation { + type: "Add" + input: "FusedBatchNormV3_mul_0" + input: "FusedBatchNormV3_add_param" + output: "Output" + add_options { + activation: NONE + } +} +input: "Input" +output: "Output" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule new file mode 100644 index 0000000..0988ecf --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule @@ -0,0 +1,7 @@ +# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1 +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "NO_ADD" $(op_count ADD) '=' 0 diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe new file mode 100644 index 0000000..b75527a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe @@ -0,0 +1,149 @@ +operand { + name: "conv2d_transpose/input_sizes" + type: INT32 + shape { + dim: 4 + } + filler { + tag: "explicit" + arg: "1" + arg: "4" + arg: "4" + arg: "16" + } +} +operand { + name: "Const_transposed" + type: FLOAT32 + shape { + dim: 16 + dim: 3 + dim: 3 + dim: 2 + } + filler { + tag: "gaussian" + arg: "0.0" + arg: "0.1" + } +} +operand { + name: "Input" + type: FLOAT32 + shape { + dim: 1 + dim: 2 + dim: 2 + dim: 2 + } +} +operand { + name: "conv2d_transpose/bias" + type: FLOAT32 + shape { + dim: 1 + dim: 1 + dim: 1 + dim: 16 + } + filler { + tag: "gaussian" + arg: "0.0" + arg: "0.1" + } +} +operand { + name: "conv2d_transpose" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 16 + } +} +operation { + type: "TransposeConv" + input: "conv2d_transpose/input_sizes" + input: "Const_transposed" + input: "Input" + input: "conv2d_transpose/bias" + output: "conv2d_transpose" + transpose_conv_options { + padding: VALID + stride_w: 1 + stride_h: 1 + } +} + +operand { + name: "FusedBatchNormV3_mul_0" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 16 + } +} +operand { + name: "FusedBatchNormV3_mul_0_param" + type: FLOAT32 + shape { + dim: 1 + dim: 1 + dim: 1 + dim: 16 + } + filler { + tag: "gaussian" + arg: "0.0" + arg: "0.1" + } +} +operation { + type: "Mul" + input: "conv2d_transpose" + input: "FusedBatchNormV3_mul_0_param" + output: "FusedBatchNormV3_mul_0" + mul_options { + activation: NONE + } +} + +operand { + name: "FusedBatchNormV3_add_param" + type: FLOAT32 + shape { + dim: 1 + dim: 1 + dim: 1 + dim: 16 + } + filler { + tag: "gaussian" + arg: "0.0" + arg: "0.1" + } +} +operand { + name: "Output" + type: FLOAT32 + shape { + dim: 1 + dim: 4 + dim: 4 + dim: 16 + } +} +operation { + type: "Add" + input: "FusedBatchNormV3_mul_0" + input: "FusedBatchNormV3_add_param" + output: "Output" + add_options { + activation: NONE + } +} +input: "Input" +output: "Output" diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule new file mode 100644 index 0000000..0988ecf --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule @@ -0,0 +1,7 @@ +# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1 +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "NO_ADD" $(op_count ADD) '=' 0 diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe new file mode 100644 index 0000000..0ae4862 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Add" + input: "ifm1" + input: "ifm2" + output: "ofm" + add_options { + activation: NONE + } +} +input: "ifm1" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule new file mode 100644 index 0000000..b51f4eb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule @@ -0,0 +1,12 @@ +# To check mixed quantization. +# Default dtype: U8, Add dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8 +RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16 +RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16 +RULE "ADD_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe new file mode 100644 index 0000000..0ae4862 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Add" + input: "ifm1" + input: "ifm2" + output: "ofm" + add_options { + activation: NONE + } +} +input: "ifm1" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule new file mode 100644 index 0000000..96a2535 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule @@ -0,0 +1,12 @@ +# To check mixed quantization. +# Default dtype: S16, Add dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16 +RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8 +RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8 +RULE "ADD_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_U8" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe new file mode 100644 index 0000000..746c343 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe @@ -0,0 +1,24 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 7 dim: 7 dim: 1 } +} +operation { + type: "AveragePool2D" + averagepool2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + filter_width: 2 + filter_height: 2 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe new file mode 100644 index 0000000..746c343 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe @@ -0,0 +1,24 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 7 dim: 7 dim: 1 } +} +operation { + type: "AveragePool2D" + averagepool2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + filter_width: 2 + filter_height: 2 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe new file mode 100644 index 0000000..2f2e91a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe @@ -0,0 +1,28 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 3 dim: 4 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 4 } +} +operation { + type: "BatchMatMul" + input: "ifm1" + input: "ifm2" + output: "ofm" + batch_matmul_options { + adj_x: false + adj_y: false + } +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule new file mode 100644 index 0000000..e832ac5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8 +RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16 +RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8 +RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe new file mode 100644 index 0000000..2f2e91a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe @@ -0,0 +1,28 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 3 dim: 4 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 4 } +} +operation { + type: "BatchMatMul" + input: "ifm1" + input: "ifm2" + output: "ofm" + batch_matmul_options { + adj_x: false + adj_y: false + } +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule new file mode 100644 index 0000000..2483377 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16 +RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8 +RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16 +RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe new file mode 100644 index 0000000..35641bd --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe @@ -0,0 +1,28 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 1 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Concatenation" + concatenation_options { + axis: 3 + activation: NONE + } + input: "ifm1" + input: "ifm2" + output: "ofm" +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule new file mode 100644 index 0000000..e832ac5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8 +RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16 +RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8 +RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe new file mode 100644 index 0000000..35641bd --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe @@ -0,0 +1,28 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 1 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Concatenation" + concatenation_options { + axis: 3 + activation: NONE + } + input: "ifm1" + input: "ifm2" + output: "ofm" +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule new file mode 100644 index 0000000..2483377 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16 +RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8 +RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16 +RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe new file mode 100644 index 0000000..8a9328b --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 64 dim: 64 dim: 32 } +} +operand { + name: "filter" + type: FLOAT32 + shape { dim: 64 dim: 1 dim: 1 dim: 32 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 64 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 32 dim: 32 dim: 64 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 2 + stride_h: 2 + } + input: "ifm" + input: "filter" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule new file mode 100644 index 0000000..f7af083 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule @@ -0,0 +1,10 @@ +# To check float32 input. +# Input is float32, Conv is uint8. Quantize Op is inserted at the beginning. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "INPUT_FLOAT32" $(tensor_dtype ifm) '=' FLOAT32 +RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8 +RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8 +RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe new file mode 100644 index 0000000..8a9328b --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 64 dim: 64 dim: 32 } +} +operand { + name: "filter" + type: FLOAT32 + shape { dim: 64 dim: 1 dim: 1 dim: 32 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 64 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 32 dim: 32 dim: 64 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 2 + stride_h: 2 + } + input: "ifm" + input: "filter" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule new file mode 100644 index 0000000..a3f52f2 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule @@ -0,0 +1,11 @@ +# To check float32 output. +# Output is float32, Conv is uint8. Dequantize Op is inserted at the end. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +# Update tensor name (ofm_Dequantize) if 'create_dequantize' function is changed. +RULE "OUTPUT_FLOAT32" $(tensor_dtype ofm_Dequantize) '=' FLOAT32 +RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8 +RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8 +RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32 +RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe new file mode 100644 index 0000000..8a9328b --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 64 dim: 64 dim: 32 } +} +operand { + name: "filter" + type: FLOAT32 + shape { dim: 64 dim: 1 dim: 1 dim: 32 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 64 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 32 dim: 32 dim: 64 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 2 + stride_h: 2 + } + input: "ifm" + input: "filter" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule new file mode 100644 index 0000000..2187895 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule @@ -0,0 +1,13 @@ +# To check float32 input/output. +# Input/Output is float32, Conv is uint8. +# Quantize Op is inserted at the beginning, Dequantize Op is inserted at the end. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "INPUT_FLOAT32" $(tensor_dtype ifm) '=' FLOAT32 +RULE "OUTPUT_FLOAT32" $(tensor_dtype ofm_Dequantize) '=' FLOAT32 +RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8 +RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8 +RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1 +RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe new file mode 100644 index 0000000..9cf8a0f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 dim: 2 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 1 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 1 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + } + input: "ifm" + input: "ker" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule new file mode 100644 index 0000000..50f235a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "KER_S16" $(tensor_dtype ker) '=' INT16 +RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe new file mode 100644 index 0000000..9cf8a0f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 dim: 2 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 1 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 1 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + } + input: "ifm" + input: "ker" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule new file mode 100644 index 0000000..ffa3bc9 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "KER_U8" $(tensor_dtype ker) '=' UINT8 +RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe new file mode 100644 index 0000000..148256a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe @@ -0,0 +1,49 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 2 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 1 dim: 2 dim: 2 dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" arg: "4" + arg: "-9" arg: "10" arg: "-11" arg: "12" + arg: "5" arg: "6" arg: "7" arg: "8" + arg: "13" arg: "-14" arg: "15" arg: "-16" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" arg: "4" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 2 dim: 1 dim: 4 } +} +operation { + type: "DepthwiseConv2D" + depthwiseconv2d_options { + padding: VALID + stride_w: 1 + stride_h: 2 + dilation_w_factor: 1 + dilation_h_factor: 1 + depth_multiplier: 2 + activation : RELU + } + input: "ifm" + input: "ker" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule new file mode 100644 index 0000000..50f235a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "KER_S16" $(tensor_dtype ker) '=' INT16 +RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe new file mode 100644 index 0000000..148256a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe @@ -0,0 +1,49 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 2 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 1 dim: 2 dim: 2 dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" arg: "4" + arg: "-9" arg: "10" arg: "-11" arg: "12" + arg: "5" arg: "6" arg: "7" arg: "8" + arg: "13" arg: "-14" arg: "15" arg: "-16" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" arg: "4" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 2 dim: 1 dim: 4 } +} +operation { + type: "DepthwiseConv2D" + depthwiseconv2d_options { + padding: VALID + stride_w: 1 + stride_h: 2 + dilation_w_factor: 1 + dilation_h_factor: 1 + depth_multiplier: 2 + activation : RELU + } + input: "ifm" + input: "ker" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule new file mode 100644 index 0000000..ffa3bc9 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "KER_U8" $(tensor_dtype ker) '=' UINT8 +RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json new file mode 100644 index 0000000..ad2bad6 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "out", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe new file mode 100644 index 0000000..0ecb561 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe @@ -0,0 +1,55 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 1 dim: 16 } +} +operand { + name: "weight" + type: FLOAT32 + shape { dim: 4 dim: 16 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "-2" arg: "-3" arg: "4" + } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 1 dim: 4 } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + } + input: "in" + input: "weight" + input: "bias" + output: "out" +} +input: "in" +output: "out" diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule new file mode 100644 index 0000000..f542560 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IN_U8" $(tensor_dtype in) '=' UINT8 +RULE "IN_QUANTIZE_S16" $(tensor_dtype in_Quantize) '=' INT16 +RULE "WEIGHT_S16" $(tensor_dtype weight) '=' INT16 +RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64 +RULE "TARGET_S16" $(tensor_dtype out) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype out_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json new file mode 100644 index 0000000..ff3eb97 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "out", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe new file mode 100644 index 0000000..0ecb561 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe @@ -0,0 +1,55 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 1 dim: 16 } +} +operand { + name: "weight" + type: FLOAT32 + shape { dim: 4 dim: 16 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "-2" arg: "-3" arg: "4" + } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 1 dim: 4 } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + } + input: "in" + input: "weight" + input: "bias" + output: "out" +} +input: "in" +output: "out" diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule new file mode 100644 index 0000000..4acd229 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IN_S16" $(tensor_dtype in) '=' INT16 +RULE "IN_QUANTIZE_U8" $(tensor_dtype in_Quantize) '=' UINT8 +RULE "WEIGHT_U8" $(tensor_dtype weight) '=' UINT8 +RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32 +RULE "TARGET_U8" $(tensor_dtype out) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype out_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe new file mode 100644 index 0000000..836a373 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "LeakyRelu" + leaky_relu_options { + alpha: 2.0 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe new file mode 100644 index 0000000..836a373 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "LeakyRelu" + leaky_relu_options { + alpha: 2.0 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe new file mode 100644 index 0000000..dca24da --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Logistic" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe new file mode 100644 index 0000000..dca24da --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Logistic" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe new file mode 100644 index 0000000..718630f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe @@ -0,0 +1,24 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 7 dim: 7 dim: 1 } +} +operation { + type: "MaxPool2D" + maxpool2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + filter_width: 2 + filter_height: 2 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe new file mode 100644 index 0000000..718630f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe @@ -0,0 +1,24 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 7 dim: 7 dim: 1 } +} +operation { + type: "MaxPool2D" + maxpool2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + filter_width: 2 + filter_height: 2 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe new file mode 100644 index 0000000..d383997 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "Mean" + mean_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe new file mode 100644 index 0000000..d383997 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "Mean" + mean_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe new file mode 100644 index 0000000..43ca30d --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Mul" + input: "ifm1" + input: "ifm2" + output: "ofm" + mul_options { + activation: NONE + } +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule new file mode 100644 index 0000000..e832ac5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8 +RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16 +RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8 +RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe new file mode 100644 index 0000000..43ca30d --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "Mul" + input: "ifm1" + input: "ifm2" + output: "ofm" + mul_options { + activation: NONE + } +} +input: "ifm1" +input: "ifm2" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule new file mode 100644 index 0000000..2483377 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16 +RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8 +RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16 +RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3 diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe new file mode 100644 index 0000000..447e4a1 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Neg" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe new file mode 100644 index 0000000..447e4a1 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Neg" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe new file mode 100644 index 0000000..c18acdb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "alpha" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 3 } + filler { + tag: "explicit" + arg: "0.1" arg: "0.3" arg: "0.5" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "PRelu" + input: "ifm" + input: "alpha" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule new file mode 100644 index 0000000..8143614 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule @@ -0,0 +1,12 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "ALPHA_S16" $(tensor_dtype alpha) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe new file mode 100644 index 0000000..c18acdb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "alpha" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 3 } + filler { + tag: "explicit" + arg: "0.1" arg: "0.3" arg: "0.5" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operation { + type: "PRelu" + input: "ifm" + input: "alpha" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule new file mode 100644 index 0000000..5b94160 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule @@ -0,0 +1,12 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "ALPHA_U8" $(tensor_dtype alpha) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe new file mode 100644 index 0000000..2cc980b --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe @@ -0,0 +1,30 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "padding" + type: INT32 + shape { dim: 4 dim: 2 } + filler { + tag: "explicit" + arg: "0" arg: "0" + arg: "1" arg: "1" + arg: "2" arg: "2" + arg: "0" arg: "0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 5 dim: 7 dim: 2 } +} +operation { + type: "Pad" + input: "ifm" + input: "padding" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe new file mode 100644 index 0000000..2cc980b --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe @@ -0,0 +1,30 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "padding" + type: INT32 + shape { dim: 4 dim: 2 } + filler { + tag: "explicit" + arg: "0" arg: "0" + arg: "1" arg: "1" + arg: "2" arg: "2" + arg: "0" arg: "0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 5 dim: 7 dim: 2 } +} +operation { + type: "Pad" + input: "ifm" + input: "padding" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe new file mode 100644 index 0000000..2265935 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU6" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe new file mode 100644 index 0000000..2265935 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU6" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe new file mode 100644 index 0000000..8eaa360 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe new file mode 100644 index 0000000..8eaa360 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe new file mode 100644 index 0000000..cdca589 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 dim: 10 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 10 } +} +operation { + type: "Reshape" + reshape_options { + new_shape: 10 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe new file mode 100644 index 0000000..cdca589 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 dim: 10 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 10 } +} +operation { + type: "Reshape" + reshape_options { + new_shape: 10 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe new file mode 100644 index 0000000..3dd4c76 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe @@ -0,0 +1,30 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "size" + type: INT32 + shape { dim: 2 } + filler { + tag: "constant" arg: "16" arg: "16" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 3 } +} +operation { + type: "ResizeBilinear" + input: "ifm1" + input: "size" + output: "ofm" + resize_bilinear_options { + align_corners: false + half_pixel_centers: false + } +} +input: "ifm1" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule new file mode 100644 index 0000000..3a3429d --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm1) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe new file mode 100644 index 0000000..3dd4c76 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe @@ -0,0 +1,30 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "size" + type: INT32 + shape { dim: 2 } + filler { + tag: "constant" arg: "16" arg: "16" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 3 } +} +operation { + type: "ResizeBilinear" + input: "ifm1" + input: "size" + output: "ofm" + resize_bilinear_options { + align_corners: false + half_pixel_centers: false + } +} +input: "ifm1" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule new file mode 100644 index 0000000..2c5fcd5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm1) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe new file mode 100644 index 0000000..ef6b964 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 8 } +} +operand { + name: "size" + type: INT32 + shape { dim: 2 } + filler { tag: "explicit" arg: "16" arg: "16" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 8 } +} +operation { + type: "ResizeNearestNeighbor" + resize_nearest_neighbor_options { + align_corners: true + } + input: "ifm" + input: "size" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe new file mode 100644 index 0000000..ef6b964 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 8 } +} +operand { + name: "size" + type: INT32 + shape { dim: 2 } + filler { tag: "explicit" arg: "16" arg: "16" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 8 } +} +operation { + type: "ResizeNearestNeighbor" + resize_nearest_neighbor_options { + align_corners: true + } + input: "ifm" + input: "size" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe new file mode 100644 index 0000000..2f9ccdd --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe @@ -0,0 +1,37 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 2 dim: 3 } +} +operand { + name: "begin" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "0" arg: "0" + } +} +operand { + name: "size" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "1" arg: "3" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 3 } +} +operation { + type: "Slice" + input: "ifm" + input: "begin" + input: "size" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe new file mode 100644 index 0000000..2f9ccdd --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe @@ -0,0 +1,37 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 2 dim: 3 } +} +operand { + name: "begin" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "0" arg: "0" + } +} +operand { + name: "size" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "1" arg: "3" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 3 } +} +operation { + type: "Slice" + input: "ifm" + input: "begin" + input: "size" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe new file mode 100644 index 0000000..ce9abf5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Softmax" + softmax_options { + beta: 0.0 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe new file mode 100644 index 0000000..ce9abf5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe @@ -0,0 +1,20 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Softmax" + softmax_options { + beta: 0.0 + } + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe new file mode 100644 index 0000000..7bdf87d --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Tanh" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe new file mode 100644 index 0000000..7bdf87d --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe @@ -0,0 +1,17 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Tanh" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe new file mode 100644 index 0000000..c281b04 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe @@ -0,0 +1,54 @@ +operand { + name: "out_shape" + type: INT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "4" arg: "4" arg: "3" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" + } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 1 dim: 3 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} + +operation { + type: "TransposeConv" + transpose_conv_options { + padding: SAME + stride_w: 1 + stride_h: 1 + } + input: "out_shape" + input: "ker" + input: "ifm" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule new file mode 100644 index 0000000..50f235a --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "KER_S16" $(tensor_dtype ker) '=' INT16 +RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe new file mode 100644 index 0000000..c281b04 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe @@ -0,0 +1,54 @@ +operand { + name: "out_shape" + type: INT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "1" arg: "4" arg: "4" arg: "3" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "3" + } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 1 dim: 3 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 4 dim: 3 } +} + +operation { + type: "TransposeConv" + transpose_conv_options { + padding: SAME + stride_w: 1 + stride_h: 1 + } + input: "out_shape" + input: "ker" + input: "ifm" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule new file mode 100644 index 0000000..ffa3bc9 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule @@ -0,0 +1,13 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "KER_U8" $(tensor_dtype ker) '=' UINT8 +RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json new file mode 100644 index 0000000..ab70bcc --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe new file mode 100644 index 0000000..82a85c1 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 8 dim: 1 } +} +operand { + name: "perm" + type: INT32 + shape { dim: 3 } + filler { tag: "explicit" arg: "1" arg: "2" arg: "0" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 8 dim: 1 dim: 3 } +} + +operation { + type: "Transpose" + transpose_options { + } + input: "ifm" + input: "perm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule new file mode 100644 index 0000000..71f381e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: U8, Target Op dtype: S16 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8 +RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16 +RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json new file mode 100644 index 0000000..010fa65 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json @@ -0,0 +1,11 @@ +{ + "default_quantization_dtype" : "int16", + "default_granularity" : "channel", + "layers" : [ + { + "name" : "ofm", + "dtype" : "uint8", + "granularity" : "channel" + } + ] +} diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe new file mode 100644 index 0000000..82a85c1 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe @@ -0,0 +1,27 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 8 dim: 1 } +} +operand { + name: "perm" + type: INT32 + shape { dim: 3 } + filler { tag: "explicit" arg: "1" arg: "2" arg: "0" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 8 dim: 1 dim: 3 } +} + +operation { + type: "Transpose" + transpose_options { + } + input: "ifm" + input: "perm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule new file mode 100644 index 0000000..b07ac58 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule @@ -0,0 +1,11 @@ +# To check mixed quantization. +# Default dtype: S16, Target Op dtype: U8 +# Quantize Ops are inserted at the beginning/end of the model. + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16 +RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8 +RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8 +RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16 +RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2 diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe new file mode 100644 index 0000000..edc8efd --- /dev/null +++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe @@ -0,0 +1,46 @@ +# +# Failed case from https://github.com/Samsung/ONE/issues/9439 +# +operand { + name: "Placeholder" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 32 } + is_variable: false +} +operand { + name: "strided_slice/stack_2" + type: INT32 + shape { dim: 4 } + filler { tag: "explicit" arg: "1" arg: "-1" arg: "1" arg: "1" } + is_variable: false +} +operand { + name: "strided_slice/stack" + type: INT32 + shape { dim: 4 } + filler { tag: "explicit" arg: "0" arg: "0" arg: "0" arg: "0" } + is_variable: false +} +operand { + name: "strided_slice" + type: FLOAT32 + shape { dim: 1 dim: 16 dim: 16 dim: 32 } + is_variable: false +} +operation { + type: "StridedSlice" + input: "Placeholder" + input: "strided_slice/stack" + input: "strided_slice/stack" + input: "strided_slice/stack_2" + output: "strided_slice" + strided_slice_options { + begin_mask: 15 + end_mask: 15 + ellipsis_mask: 0 + new_axis_mask: 0 + shrink_axis_mask: 0 + } +} +input: "Placeholder" +output: "strided_slice" diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py index 8114c50..0cfa27e 100644 --- a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.raw_ops.AddV2(x=lhs_, y=rhs_) diff --git a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py index b9f7a1c..5f851cb 100644 --- a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 5, 4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.raw_ops.BatchMatMulV2(x=lhs_, y=rhs_) diff --git a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py index d28034b..b4f0297 100644 --- a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py +++ b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole") op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True) diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py index 99940bf..995efd5 100644 --- a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32) paddings_ = tf.compat.v1.constant( np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32)) diff --git a/res/TensorFlowPythonExamples/examples/abs/__init__.py b/res/TensorFlowPythonExamples/examples/abs/__init__.py index fd55155..83ac3cb 100755 --- a/res/TensorFlowPythonExamples/examples/abs/__init__.py +++ b/res/TensorFlowPythonExamples/examples/abs/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") abs_ = tf.compat.v1.abs(in_) diff --git a/res/TensorFlowPythonExamples/examples/add/__init__.py b/res/TensorFlowPythonExamples/examples/add/__init__.py index 7e283f3..39790a0 100755 --- a/res/TensorFlowPythonExamples/examples/add/__init__.py +++ b/res/TensorFlowPythonExamples/examples/add/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.add(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/add_n/__init__.py b/res/TensorFlowPythonExamples/examples/add_n/__init__.py index afd068d..c8e23c9 100644 --- a/res/TensorFlowPythonExamples/examples/add_n/__init__.py +++ b/res/TensorFlowPythonExamples/examples/add_n/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") in3_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/argmax/__init__.py b/res/TensorFlowPythonExamples/examples/argmax/__init__.py index 059df97..b8791b4 100755 --- a/res/TensorFlowPythonExamples/examples/argmax/__init__.py +++ b/res/TensorFlowPythonExamples/examples/argmax/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.math.argmax(in_) diff --git a/res/TensorFlowPythonExamples/examples/argmin/__init__.py b/res/TensorFlowPythonExamples/examples/argmin/__init__.py index f9a5462..39f3278 100644 --- a/res/TensorFlowPythonExamples/examples/argmin/__init__.py +++ b/res/TensorFlowPythonExamples/examples/argmin/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.math.argmin(in_) diff --git a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py index 90756b0..c430749 100644 --- a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py +++ b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole") filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32) diff --git a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py index a8ab0dd..814cf57 100644 --- a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py +++ b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole") op_ = tf.compat.v1.nn.avg_pool2d(in_, (2, 2), 1, "VALID") diff --git a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py index e865552..4a77870 100644 --- a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py +++ b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + mean = tf.compat.v1.constant([1., 2., 3.]) variance = tf.compat.v1.constant([4., 5., 6.]) offset = tf.compat.v1.constant([7., 8., 9.]) diff --git a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py index 1dd08b0..9efa85c 100644 --- a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py +++ b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=[4, 1, 1, 1], name="Hole") cr_ = tf.constant([[0, 0], [0, 0]], name="Hole") op_ = tf.batch_to_space(in_, cr_, 2) diff --git a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py index eb8a69b..72ffe10 100755 --- a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py +++ b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1, 2, 3), name="Hole") op_ = tf.nn.bias_add(in_, bias=[1.0, 1.0, -1.0], data_format="NHWC") diff --git a/res/TensorFlowPythonExamples/examples/cast/__init__.py b/res/TensorFlowPythonExamples/examples/cast/__init__.py index 4c0adc0..5919e0d 100644 --- a/res/TensorFlowPythonExamples/examples/cast/__init__.py +++ b/res/TensorFlowPythonExamples/examples/cast/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") cast_ = tf.cast(in_, tf.int32) diff --git a/res/TensorFlowPythonExamples/examples/ceil/__init__.py b/res/TensorFlowPythonExamples/examples/ceil/__init__.py index 5178f8f..79737c8 100755 --- a/res/TensorFlowPythonExamples/examples/ceil/__init__.py +++ b/res/TensorFlowPythonExamples/examples/ceil/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.ceil(in_) diff --git a/res/TensorFlowPythonExamples/examples/concat/__init__.py b/res/TensorFlowPythonExamples/examples/concat/__init__.py index ec59b24..c1c7b1a 100644 --- a/res/TensorFlowPythonExamples/examples/concat/__init__.py +++ b/res/TensorFlowPythonExamples/examples/concat/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole1") in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 4), name="Hole2") concat_ = tf.compat.v1.concat([in1_, in2_], axis=-2) diff --git a/res/TensorFlowPythonExamples/examples/cond/__init__.py b/res/TensorFlowPythonExamples/examples/cond/__init__.py index deafbb1..660ec9b 100644 --- a/res/TensorFlowPythonExamples/examples/cond/__init__.py +++ b/res/TensorFlowPythonExamples/examples/cond/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX") y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY") z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ") diff --git a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py index fed1920..da88094 100644 --- a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py +++ b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX") y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY") z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ") diff --git a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py index fa4f72f..7cf8dee 100644 --- a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py +++ b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole") filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32) diff --git a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py index 680bb36..812fef1 100644 --- a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole") filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32) diff --git a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py index 17fd6e2..cd317ce 100644 --- a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py +++ b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole") kernel_ = tf.compat.v1.placeholder(tf.float32, shape=(3, 3, 1, 1), name="Hole") op_ = tf.compat.v1.nn.conv2d_transpose( diff --git a/res/TensorFlowPythonExamples/examples/cos/__init__.py b/res/TensorFlowPythonExamples/examples/cos/__init__.py index cfce5d8..3271ddb 100755 --- a/res/TensorFlowPythonExamples/examples/cos/__init__.py +++ b/res/TensorFlowPythonExamples/examples/cos/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.cos(in_) diff --git a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py index 0cbc304..c11766e 100644 --- a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py +++ b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 1, 1, 4], name="Hole") op_ = tf.nn.depth_to_space(in_, 2) diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py index 7df1938..a9c8b33 100644 --- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py +++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole") filters = np.array( diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py index 4800ebd..8fbd0da 100644 --- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf import numpy as np +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole") filters = np.array( diff --git a/res/TensorFlowPythonExamples/examples/div/__init__.py b/res/TensorFlowPythonExamples/examples/div/__init__.py index 2887771..9acf916 100755 --- a/res/TensorFlowPythonExamples/examples/div/__init__.py +++ b/res/TensorFlowPythonExamples/examples/div/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.div(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/elu/__init__.py b/res/TensorFlowPythonExamples/examples/elu/__init__.py index b41f651..91c6209 100755 --- a/res/TensorFlowPythonExamples/examples/elu/__init__.py +++ b/res/TensorFlowPythonExamples/examples/elu/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") elu_ = tf.compat.v1.nn.elu(in_) diff --git a/res/TensorFlowPythonExamples/examples/exp/__init__.py b/res/TensorFlowPythonExamples/examples/exp/__init__.py index e836384..5a7c88d 100644 --- a/res/TensorFlowPythonExamples/examples/exp/__init__.py +++ b/res/TensorFlowPythonExamples/examples/exp/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.exp(in_) diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py index ab6a87f..1f99c11 100644 --- a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py +++ b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + # example 1 where input has all known dims and axis is const in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py index 36c5475..1b1626a 100644 --- a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py +++ b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + # example 2 where input has unknown dim and axis is const in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(None, None), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py index 6304c23..c73b0ba 100644 --- a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py +++ b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + # example 3 where input has all known dim and axis is not const in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/fill/__init__.py b/res/TensorFlowPythonExamples/examples/fill/__init__.py index f8413bb..1c9d204 100644 --- a/res/TensorFlowPythonExamples/examples/fill/__init__.py +++ b/res/TensorFlowPythonExamples/examples/fill/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(), name="Hole") op_ = tf.compat.v1.fill((3, 4), in_) diff --git a/res/TensorFlowPythonExamples/examples/flatten/__init__.py b/res/TensorFlowPythonExamples/examples/flatten/__init__.py index bb6dbaa..3f13568 100644 --- a/res/TensorFlowPythonExamples/examples/flatten/__init__.py +++ b/res/TensorFlowPythonExamples/examples/flatten/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole") op_ = tf.compat.v1.layers.flatten(in_) diff --git a/res/TensorFlowPythonExamples/examples/floor/__init__.py b/res/TensorFlowPythonExamples/examples/floor/__init__.py index 3b3f5bf..0357cee 100755 --- a/res/TensorFlowPythonExamples/examples/floor/__init__.py +++ b/res/TensorFlowPythonExamples/examples/floor/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.floor(in_) diff --git a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py index 34f413f..5714bf5 100755 --- a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py +++ b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.floordiv(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/floormod/__init__.py b/res/TensorFlowPythonExamples/examples/floormod/__init__.py index c06e2a9..f4e1a5f 100644 --- a/res/TensorFlowPythonExamples/examples/floormod/__init__.py +++ b/res/TensorFlowPythonExamples/examples/floormod/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.floormod(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py index 5e13b0d..628420c 100644 --- a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py +++ b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + scale = tf.compat.v1.constant([1., 2., 3.]) offset = tf.compat.v1.constant([4., 5., 6.]) mean = tf.constant([1., 2., 3.]) diff --git a/res/TensorFlowPythonExamples/examples/gather/__init__.py b/res/TensorFlowPythonExamples/examples/gather/__init__.py index 173be4a..67b4d07 100644 --- a/res/TensorFlowPythonExamples/examples/gather/__init__.py +++ b/res/TensorFlowPythonExamples/examples/gather/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 2, 3, 4), name="Hole") indices_ = tf.constant([1, 2]) op_ = tf.gather(param_, indices_, axis=2) diff --git a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py index 1ff11d5..8c0df36 100644 --- a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py +++ b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 2), name="Hole") indices_ = tf.constant([[0, 1], [1, 0]]) op_ = tf.gather_nd(param_, indices_) diff --git a/res/TensorFlowPythonExamples/examples/greater/__init__.py b/res/TensorFlowPythonExamples/examples/greater/__init__.py index e88f574..b8578e3 100755 --- a/res/TensorFlowPythonExamples/examples/greater/__init__.py +++ b/res/TensorFlowPythonExamples/examples/greater/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.greater(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py index b15fbd3..cf10e4d 100755 --- a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py +++ b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.greater_equal(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/gru/__init__.py b/res/TensorFlowPythonExamples/examples/gru/__init__.py index 26ee75d..0d47189 100755 --- a/res/TensorFlowPythonExamples/examples/gru/__init__.py +++ b/res/TensorFlowPythonExamples/examples/gru/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf from tensorflow import keras +tf.compat.v1.disable_eager_execution() + model = keras.Sequential() shape = (4, 4) model.add(keras.layers.GRU(2, input_shape=shape)) diff --git a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py index b44942c..62a774e 100644 --- a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py +++ b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + sess = tf.Session() in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py index 0dda6bf..fe26e06 100644 --- a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py +++ b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + arg = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.math.l2_normalize(arg) diff --git a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py index d595edb..c1899de 100755 --- a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py +++ b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.leaky_relu(in_) diff --git a/res/TensorFlowPythonExamples/examples/less/__init__.py b/res/TensorFlowPythonExamples/examples/less/__init__.py index 41ba18c..6fee74a 100755 --- a/res/TensorFlowPythonExamples/examples/less/__init__.py +++ b/res/TensorFlowPythonExamples/examples/less/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.less(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py index d60bf2a..fdca649 100755 --- a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py +++ b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.less_equal(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py index eca6b22..c358bd0 100644 --- a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py +++ b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 20), name="Hole") op_ = tf.compat.v1.nn.lrn(x_, 5, 1.0, 1.0, 0.5) diff --git a/res/TensorFlowPythonExamples/examples/log/__init__.py b/res/TensorFlowPythonExamples/examples/log/__init__.py index cb206c0..d8787ef 100644 --- a/res/TensorFlowPythonExamples/examples/log/__init__.py +++ b/res/TensorFlowPythonExamples/examples/log/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.math.log(in_) diff --git a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py index 651888c..a13f211 100644 --- a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py +++ b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.log_softmax(in_) diff --git a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py index c3d4589..856ebd9 100644 --- a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole") op_ = tf.compat.v1.nn.log_softmax(in_, axis=1) diff --git a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py index f546fae..d0c4ea2 100755 --- a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py +++ b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole") op_ = tf.compat.v1.logical_and(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py index f1bcc2c..532d5ff 100755 --- a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py +++ b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole") op_ = tf.compat.v1.logical_not(in_) diff --git a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py index 991d61a..ce584ea 100755 --- a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py +++ b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole") op_ = tf.compat.v1.logical_or(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/lstm/__init__.py b/res/TensorFlowPythonExamples/examples/lstm/__init__.py index c07948b..99ef3c2 100755 --- a/res/TensorFlowPythonExamples/examples/lstm/__init__.py +++ b/res/TensorFlowPythonExamples/examples/lstm/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf from tensorflow import keras +tf.compat.v1.disable_eager_execution() + model = keras.Sequential() shape = (4, 4) model.add(keras.layers.LSTM(2, input_shape=shape)) diff --git a/res/TensorFlowPythonExamples/examples/matmul/__init__.py b/res/TensorFlowPythonExamples/examples/matmul/__init__.py index 760241d..6f049e5 100755 --- a/res/TensorFlowPythonExamples/examples/matmul/__init__.py +++ b/res/TensorFlowPythonExamples/examples/matmul/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 4), name="Hole") rhs_ = tf.compat.v1.constant(dtype=tf.float32, shape=(4, 4), name="Hole", value=1.0) op_ = tf.compat.v1.matmul(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py index 43d4d87..a708f35 100644 --- a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py +++ b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.matrix_band_part(in_, 1, -1) diff --git a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py index 384a298..cd789ea 100644 --- a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py +++ b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.matrix_diag(in_) diff --git a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py index e8878f0..55b8690 100644 --- a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py +++ b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole") diag_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole") op_ = tf.compat.v1.matrix_set_diag(in_, diag_) diff --git a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py index 487858c..78daa03 100755 --- a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py +++ b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 1), name="Hole") op_ = tf.compat.v1.nn.max_pool_with_argmax( in_, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="VALID") diff --git a/res/TensorFlowPythonExamples/examples/maximum/__init__.py b/res/TensorFlowPythonExamples/examples/maximum/__init__.py index a96fe03..0656ba4 100755 --- a/res/TensorFlowPythonExamples/examples/maximum/__init__.py +++ b/res/TensorFlowPythonExamples/examples/maximum/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.maximum(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/minimum/__init__.py b/res/TensorFlowPythonExamples/examples/minimum/__init__.py index ef664db..ebd795e 100755 --- a/res/TensorFlowPythonExamples/examples/minimum/__init__.py +++ b/res/TensorFlowPythonExamples/examples/minimum/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.minimum(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/multiply/__init__.py b/res/TensorFlowPythonExamples/examples/multiply/__init__.py index da88856..68dff1e 100755 --- a/res/TensorFlowPythonExamples/examples/multiply/__init__.py +++ b/res/TensorFlowPythonExamples/examples/multiply/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.multiply(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/negative/__init__.py b/res/TensorFlowPythonExamples/examples/negative/__init__.py index 86713da..473dc9b 100644 --- a/res/TensorFlowPythonExamples/examples/negative/__init__.py +++ b/res/TensorFlowPythonExamples/examples/negative/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") neg_ = tf.math.negative(in_) diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py index b8f010c..2598b53 100644 --- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py +++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + max_output_size = tf.compat.v1.constant(4) in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py index 42e7bf0..932ad35 100644 --- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + max_output_size = tf.compat.v1.constant(6) iou_threshold = tf.compat.v1.constant(0.5) score_threshold = tf.compat.v1.constant(0.6) diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py index 32c6173..c251b92 100644 --- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py +++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + max_output_size = tf.compat.v1.constant(4) in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py index 415f920..a7185c3 100644 --- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + max_output_size = tf.compat.v1.constant(6) iou_threshold = tf.compat.v1.constant(0.5) score_threshold = tf.compat.v1.constant(0.6) diff --git a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py index 95073fe..955eb1f 100755 --- a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py +++ b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.not_equal(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py index 49e0346..b99bb9c 100644 --- a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py +++ b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + indice_ = tf.compat.v1.placeholder(tf.int32, shape=(1, 2, 3, 4), name='Hole') depth_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole') on_value_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole') diff --git a/res/TensorFlowPythonExamples/examples/pack/__init__.py b/res/TensorFlowPythonExamples/examples/pack/__init__.py index 609bc9b..4f1c46b 100755 --- a/res/TensorFlowPythonExamples/examples/pack/__init__.py +++ b/res/TensorFlowPythonExamples/examples/pack/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_1 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole") in_2 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole") op_ = tf.compat.v1.stack([in_1, in_2]) diff --git a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py index dc877f1..a78e215 100644 --- a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py +++ b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole") paddings_ = tf.constant([[1, 1], [2, 2]], name="Hole") op_ = tf.pad(tensor_, paddings_, "REFLECT") diff --git a/res/TensorFlowPythonExamples/examples/pad/__init__.py b/res/TensorFlowPythonExamples/examples/pad/__init__.py index ac5cf81..7097b75 100755 --- a/res/TensorFlowPythonExamples/examples/pad/__init__.py +++ b/res/TensorFlowPythonExamples/examples/pad/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole") paddings_ = tf.compat.v1.constant([[1, 1], [2, 2]], name="Hole") op_ = tf.compat.v1.pad(tensor_, paddings_) diff --git a/res/TensorFlowPythonExamples/examples/pow/__init__.py b/res/TensorFlowPythonExamples/examples/pow/__init__.py index 960032a..12a19f2 100755 --- a/res/TensorFlowPythonExamples/examples/pow/__init__.py +++ b/res/TensorFlowPythonExamples/examples/pow/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.pow(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/prelu/__init__.py b/res/TensorFlowPythonExamples/examples/prelu/__init__.py index b0e7c7b..7e43f51 100644 --- a/res/TensorFlowPythonExamples/examples/prelu/__init__.py +++ b/res/TensorFlowPythonExamples/examples/prelu/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + input_tensor = tf.compat.v1.placeholder( dtype=tf.float32, name="input", shape=[1, 4, 4, 3]) prelu = tf.keras.layers.PReLU(shared_axes=[1, 2]) diff --git a/res/TensorFlowPythonExamples/examples/range/__init__.py b/res/TensorFlowPythonExamples/examples/range/__init__.py index 0f032e9..9b57167 100644 --- a/res/TensorFlowPythonExamples/examples/range/__init__.py +++ b/res/TensorFlowPythonExamples/examples/range/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + # this modified example comes from TF API reference start = 1 limit = 10 diff --git a/res/TensorFlowPythonExamples/examples/rank/__init__.py b/res/TensorFlowPythonExamples/examples/rank/__init__.py index c9b9707..ab2bc79 100644 --- a/res/TensorFlowPythonExamples/examples/rank/__init__.py +++ b/res/TensorFlowPythonExamples/examples/rank/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 3, 3), name="Hole") rank_ = tf.compat.v1.rank(in_) diff --git a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py index eb9167f..2fee752 100644 --- a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + input_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 4), name="Hole") op_ = tf.compat.v1.reduce_all(input_, axis=1, keepdims=False) diff --git a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py index f87c251..0e87a0c 100644 --- a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 2), name="Hole") op_ = tf.compat.v1.math.reduce_any(in_) diff --git a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py index 27e48df..dc5e0d6 100644 --- a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole") op_ = tf.compat.v1.math.reduce_max(in_) diff --git a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py index b3cf034..fe81336 100644 --- a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole") op_ = tf.compat.v1.math.reduce_min(in_) diff --git a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py index 4d134ae..9fe2ee2 100644 --- a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole") op_ = tf.compat.v1.math.reduce_prod(in_) diff --git a/res/TensorFlowPythonExamples/examples/relu/__init__.py b/res/TensorFlowPythonExamples/examples/relu/__init__.py index a144a12..69e0753 100755 --- a/res/TensorFlowPythonExamples/examples/relu/__init__.py +++ b/res/TensorFlowPythonExamples/examples/relu/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.relu(in_) diff --git a/res/TensorFlowPythonExamples/examples/relu6/__init__.py b/res/TensorFlowPythonExamples/examples/relu6/__init__.py index f58ae7c..d581d39 100755 --- a/res/TensorFlowPythonExamples/examples/relu6/__init__.py +++ b/res/TensorFlowPythonExamples/examples/relu6/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.relu6(in_) diff --git a/res/TensorFlowPythonExamples/examples/reshape/__init__.py b/res/TensorFlowPythonExamples/examples/reshape/__init__.py index f451bac..c60c0a6 100644 --- a/res/TensorFlowPythonExamples/examples/reshape/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reshape/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.reshape(in_, shape=[2, 2, 2, 2]) diff --git a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py index 422bf1d..773fc07 100755 --- a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py +++ b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole") op_ = tf.compat.v1.image.resize_bilinear(in_, [16, 16]) diff --git a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py index a140229..3e688d3 100755 --- a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py +++ b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole") op_ = tf.compat.v1.image.resize_nearest_neighbor(in_, [16, 16]) diff --git a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py index aebd4fc..4b7a9cf 100755 --- a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 8), name="Hole") op_ = tf.compat.v1.reverse_sequence(in_, [7, 2, 3, 5], seq_axis=1, batch_axis=0) diff --git a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py index e6afc99..0404cd6 100755 --- a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole") op_ = tf.compat.v1.reverse_v2(in_, [3, 2]) diff --git a/res/TensorFlowPythonExamples/examples/rnn/__init__.py b/res/TensorFlowPythonExamples/examples/rnn/__init__.py index 5e76951..9c1e69c 100755 --- a/res/TensorFlowPythonExamples/examples/rnn/__init__.py +++ b/res/TensorFlowPythonExamples/examples/rnn/__init__.py @@ -1,6 +1,8 @@ import tensorflow as tf from tensorflow import keras +tf.compat.v1.disable_eager_execution() + model = keras.Sequential() shape = (4, 4) model.add(keras.layers.SimpleRNN(2, input_shape=shape)) diff --git a/res/TensorFlowPythonExamples/examples/round/__init__.py b/res/TensorFlowPythonExamples/examples/round/__init__.py index 9a00ad5..6cda033 100755 --- a/res/TensorFlowPythonExamples/examples/round/__init__.py +++ b/res/TensorFlowPythonExamples/examples/round/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.round(in_) diff --git a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py index 90500bd..dc81e48 100755 --- a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py +++ b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.rsqrt(in_) diff --git a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py index e094b57..0158e3c 100644 --- a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py +++ b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py @@ -2,6 +2,8 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + indices = tf.compat.v1.constant([[0], [2]]) updates = tf.compat.v1.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, diff --git a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py index 24d15bb..c15746a 100755 --- a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py +++ b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(4, ), name="Hole") op_ = tf.compat.v1.math.segment_sum(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/shape/__init__.py b/res/TensorFlowPythonExamples/examples/shape/__init__.py index 4c13a33..b719eb9 100644 --- a/res/TensorFlowPythonExamples/examples/shape/__init__.py +++ b/res/TensorFlowPythonExamples/examples/shape/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 2, 3), name="Hole") op_ = tf.compat.v1.shape(in_) diff --git a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py index 43328f2..1749071 100755 --- a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py +++ b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.sigmoid(in_) diff --git a/res/TensorFlowPythonExamples/examples/sin/__init__.py b/res/TensorFlowPythonExamples/examples/sin/__init__.py index 0bfdcff..75ea73b 100644 --- a/res/TensorFlowPythonExamples/examples/sin/__init__.py +++ b/res/TensorFlowPythonExamples/examples/sin/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.sin(in_) diff --git a/res/TensorFlowPythonExamples/examples/slice/__init__.py b/res/TensorFlowPythonExamples/examples/slice/__init__.py index 45f9044..b734dc2 100644 --- a/res/TensorFlowPythonExamples/examples/slice/__init__.py +++ b/res/TensorFlowPythonExamples/examples/slice/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole") op_ = tf.compat.v1.slice(in_, [1, 0, 0], [1, 1, 3]) diff --git a/res/TensorFlowPythonExamples/examples/softmax/__init__.py b/res/TensorFlowPythonExamples/examples/softmax/__init__.py index 5b8d1cd..3c93e8a 100755 --- a/res/TensorFlowPythonExamples/examples/softmax/__init__.py +++ b/res/TensorFlowPythonExamples/examples/softmax/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.nn.softmax(in_) diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py index e088012..b0e3d85 100644 --- a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py +++ b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole") pd_ = tf.constant([[0, 0], [0, 0]], name="Hole") op_ = tf.space_to_batch(in_, pd_, 2) diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py index 7601950..892796b 100644 --- a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py +++ b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole") bs_ = tf.constant([2, 2], name="Hole") pd_ = tf.constant([[0, 0], [0, 0]], name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py index e9bc945..e146f6a 100644 --- a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py +++ b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole") op_ = tf.nn.space_to_depth(in_, 2) diff --git a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py index 5fe0bc4..0ce8f0b 100644 --- a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py +++ b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.sparse_placeholder(tf.float32, name="Hole") op_ = tf.compat.v1.sparse_tensor_to_dense(in_) diff --git a/res/TensorFlowPythonExamples/examples/split/__init__.py b/res/TensorFlowPythonExamples/examples/split/__init__.py index 4226f30..11f5427 100644 --- a/res/TensorFlowPythonExamples/examples/split/__init__.py +++ b/res/TensorFlowPythonExamples/examples/split/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole") op_ = tf.compat.v1.split(in_, 2) diff --git a/res/TensorFlowPythonExamples/examples/split_2/__init__.py b/res/TensorFlowPythonExamples/examples/split_2/__init__.py index 03777df..6212c6e 100644 --- a/res/TensorFlowPythonExamples/examples/split_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/split_2/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole") op_ = tf.compat.v1.split(in_, [1, 2, 1]) diff --git a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py index 4aab5da..8e304e8 100755 --- a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py +++ b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.sqrt(in_) diff --git a/res/TensorFlowPythonExamples/examples/square/__init__.py b/res/TensorFlowPythonExamples/examples/square/__init__.py index 2d03e9b..f0c3e44 100644 --- a/res/TensorFlowPythonExamples/examples/square/__init__.py +++ b/res/TensorFlowPythonExamples/examples/square/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.math.square(in_) diff --git a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py index baacf56..6e86f84 100755 --- a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py +++ b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.squared_difference(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py index d054f01..ba2348c 100755 --- a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py +++ b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 1, 4), name="Hole") op_ = tf.compat.v1.squeeze(in_) diff --git a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py index 5715bed..d613458 100755 --- a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 1, 1), name="Hole") op_ = tf.compat.v1.squeeze(in_, (0, 2)) diff --git a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py index 2d7234d..a6fa99a 100644 --- a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py +++ b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole") op_ = tf.compat.v1.strided_slice(in_, [1, 0, 0], [2, 1, 3], [1, 1, 1]) diff --git a/res/TensorFlowPythonExamples/examples/subtract/__init__.py b/res/TensorFlowPythonExamples/examples/subtract/__init__.py index feb11b1..39cdbc3 100755 --- a/res/TensorFlowPythonExamples/examples/subtract/__init__.py +++ b/res/TensorFlowPythonExamples/examples/subtract/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.compat.v1.subtract(lhs_, rhs_) diff --git a/res/TensorFlowPythonExamples/examples/sum/__init__.py b/res/TensorFlowPythonExamples/examples/sum/__init__.py index 69297d6..14e408c 100644 --- a/res/TensorFlowPythonExamples/examples/sum/__init__.py +++ b/res/TensorFlowPythonExamples/examples/sum/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 4), name="Hole") op_ = tf.compat.v1.reduce_sum(in_, -1, True) diff --git a/res/TensorFlowPythonExamples/examples/tanh/__init__.py b/res/TensorFlowPythonExamples/examples/tanh/__init__.py index dd202a7..ccd3757 100755 --- a/res/TensorFlowPythonExamples/examples/tanh/__init__.py +++ b/res/TensorFlowPythonExamples/examples/tanh/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole") op_ = tf.compat.v1.tanh(in_) diff --git a/res/TensorFlowPythonExamples/examples/tile/__init__.py b/res/TensorFlowPythonExamples/examples/tile/__init__.py index aad4e73..f5d4ef8 100755 --- a/res/TensorFlowPythonExamples/examples/tile/__init__.py +++ b/res/TensorFlowPythonExamples/examples/tile/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole") multiples_ = tf.compat.v1.constant([1, 2], name="Hole") op_ = tf.compat.v1.tile(in_, multiples_) diff --git a/res/TensorFlowPythonExamples/examples/top_k/__init__.py b/res/TensorFlowPythonExamples/examples/top_k/__init__.py index e7b8234..05c3306 100644 --- a/res/TensorFlowPythonExamples/examples/top_k/__init__.py +++ b/res/TensorFlowPythonExamples/examples/top_k/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4], name="Hole") op_ = tf.compat.v1.math.top_k(in_, k=1) diff --git a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py index eaeb32a..3dde2b9 100644 --- a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py +++ b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole") op_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)(in_) diff --git a/res/TensorFlowPythonExamples/examples/unique/__init__.py b/res/TensorFlowPythonExamples/examples/unique/__init__.py index ad65757..00e4f3c 100644 --- a/res/TensorFlowPythonExamples/examples/unique/__init__.py +++ b/res/TensorFlowPythonExamples/examples/unique/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(6), name="Hole") op_ = tf.compat.v1.unique(in_) diff --git a/res/TensorFlowPythonExamples/examples/unstack/__init__.py b/res/TensorFlowPythonExamples/examples/unstack/__init__.py index e4ffa21..2a17856 100644 --- a/res/TensorFlowPythonExamples/examples/unstack/__init__.py +++ b/res/TensorFlowPythonExamples/examples/unstack/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4, 2, 3, 4], name="Hole") unpack_ = tf.compat.v1.unstack(in_, axis=0) diff --git a/res/TensorFlowPythonExamples/examples/where/__init__.py b/res/TensorFlowPythonExamples/examples/where/__init__.py index 69c89c8..94b7472 100644 --- a/res/TensorFlowPythonExamples/examples/where/__init__.py +++ b/res/TensorFlowPythonExamples/examples/where/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole") where_ = tf.compat.v1.where(in_) diff --git a/res/TensorFlowPythonExamples/examples/where_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_2/__init__.py index 78c50e0..19ad0f2 100644 --- a/res/TensorFlowPythonExamples/examples/where_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/where_2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole") in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole") in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py index de87af7..b6cc7de 100644 --- a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole") where_v2_ = tf.compat.v1.where_v2(in_) diff --git a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py index 4ce17ca..e3ffe03 100644 --- a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[3], name="Hole") in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 1], name="Hole") in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, 3], name="Hole") diff --git a/res/TensorFlowPythonExamples/examples/while/__init__.py b/res/TensorFlowPythonExamples/examples/while/__init__.py index fadaa73..15ff4eb 100644 --- a/res/TensorFlowPythonExamples/examples/while/__init__.py +++ b/res/TensorFlowPythonExamples/examples/while/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + i = tf.compat.v1.constant(0, name="Hole") c = lambda i: tf.compat.v1.less(i, 10) diff --git a/res/TensorFlowPythonExamples/examples/while_2/__init__.py b/res/TensorFlowPythonExamples/examples/while_2/__init__.py index af1c745..9e26639 100644 --- a/res/TensorFlowPythonExamples/examples/while_2/__init__.py +++ b/res/TensorFlowPythonExamples/examples/while_2/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i') x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole') diff --git a/res/TensorFlowPythonExamples/examples/while_3/__init__.py b/res/TensorFlowPythonExamples/examples/while_3/__init__.py index 840846e..30ce15a 100644 --- a/res/TensorFlowPythonExamples/examples/while_3/__init__.py +++ b/res/TensorFlowPythonExamples/examples/while_3/__init__.py @@ -1,5 +1,7 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole') i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2') diff --git a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py index 5230bba..16414ce 100755 --- a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py +++ b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 16, 3), name="Hole") op_ = tf.compat.v1.image.yuv_to_rgb(in_) diff --git a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py index 7daf85e..d4080ec 100644 --- a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py +++ b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py @@ -1,4 +1,6 @@ import tensorflow as tf +tf.compat.v1.disable_eager_execution() + in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole") op_ = tf.zeros_like(in_) diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle index 9a94650..bc39a09 100644 --- a/runtime/contrib/android/api/build.gradle +++ b/runtime/contrib/android/api/build.gradle @@ -8,7 +8,7 @@ android { minSdkVersion 26 targetSdkVersion 29 versionCode 1 - versionName "1.20.0" + versionName "1.21.0" externalNativeBuild { ndkBuild { diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt index 557d403..69d6a92 100644 --- a/runtime/libs/misc/CMakeLists.txt +++ b/runtime/libs/misc/CMakeLists.txt @@ -1,11 +1,22 @@ # Library `nnfw_lib_misc` -file(GLOB_RECURSE NNFW_UTILITY_SRCS "src/*.cpp") +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS}) +add_library(nnfw_lib_misc STATIC ${SOURCES}) target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON) target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common) target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage) -add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp") -target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +add_executable(nnfw_lib_misc_test ${TESTS}) +target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_lib_misc) +target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage) +target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD}) + +add_test(nnfw_lib_misc_test nnfw_lib_misc_test) +install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone) diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/examples/tensor_index_iterator.cpp deleted file mode 100644 index 590b433..0000000 --- a/runtime/libs/misc/examples/tensor_index_iterator.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "misc/tensor/IndexIterator.h" - -#include - -#include -#include - -#include - -void test_iterate(void) -{ - const nnfw::misc::tensor::Shape shape{3, 4, 7}; - - std::array array; - - array.fill(0); - - using nnfw::misc::tensor::Index; - using nnfw::misc::tensor::iterate; - - iterate(shape) << [&](const Index &index) { - assert(index.rank() == shape.rank()); - - const uint32_t rank = index.rank(); - - uint32_t offset = index.at(0); - - for (uint32_t axis = 1; axis < rank; ++axis) - { - offset *= shape.dim(axis); - offset += index.at(axis); - } - - array[offset] += 1; - }; - - assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; })); -} - -int main(int argc, char **argv) -{ - test_iterate(); - - nnfw::misc::tensor::Shape shape{3, 4, 3, 4}; - - std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl; - - nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) { - std::cout << "rank: " << index.rank() << std::endl; - - for (uint32_t d = 0; d < index.rank(); ++d) - { - std::cout << " offset(" << d << ") = " << index.at(d) << std::endl; - } - }; - - return 0; -} diff --git a/runtime/libs/misc/include/misc/EnvConfigSource.h b/runtime/libs/misc/include/misc/EnvConfigSource.h new file mode 100644 index 0000000..63c8ae9 --- /dev/null +++ b/runtime/libs/misc/include/misc/EnvConfigSource.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_MISC_ENV_CONFIG_SOURCE_H__ +#define __NNFW_MISC_ENV_CONFIG_SOURCE_H__ + +#include "GeneralConfigSource.h" + +#include + +namespace nnfw +{ +namespace misc +{ + +class EnvConfigSource final : public GeneralConfigSource +{ +public: + std::string get(const std::string &key) const override; + +private: + std::unordered_map _default_attributes; +}; + +} // namespace misc +} // namespace nnfw + +#endif // __NNFW_MISC_ENV_CONFIG_SOURCE_H__ diff --git a/runtime/libs/misc/include/misc/GeneralConfigSource.h b/runtime/libs/misc/include/misc/GeneralConfigSource.h new file mode 100644 index 0000000..a3de66e --- /dev/null +++ b/runtime/libs/misc/include/misc/GeneralConfigSource.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__ +#define __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__ + +#include "IConfigSource.h" + +#include + +namespace nnfw +{ +namespace misc +{ + +class GeneralConfigSource : public IConfigSource +{ +public: + GeneralConfigSource() = default; + + std::string get(const std::string &key) const override; + void set(const std::string &key, const std::string &val); + +private: + std::unordered_map _map; +}; + +} // namespace misc +} // namespace nnfw + +#endif // __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__ diff --git a/runtime/libs/misc/include/misc/IConfigSource.h b/runtime/libs/misc/include/misc/IConfigSource.h new file mode 100644 index 0000000..fe2c48e --- /dev/null +++ b/runtime/libs/misc/include/misc/IConfigSource.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_MISC_I_CONFIG_SOURCE_H__ +#define __NNFW_MISC_I_CONFIG_SOURCE_H__ + +#include + +namespace nnfw +{ +namespace misc +{ + +struct IConfigSource +{ + /** + * @brief Destroy the IConfigSource object + */ + virtual ~IConfigSource() = default; + + /** + * @brief get the value for the matching key + * + * @param key string key to search + * @return string value associated with the key + */ + virtual std::string get(const std::string &key) const = 0; +}; + +} // namespace misc +} // namespace nnfw + +#endif // __NNFW_MISC_I_CONFIG_SOURCE_H__ diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h index 46fecca..c9d7203 100644 --- a/runtime/libs/misc/include/misc/string_helpers.h +++ b/runtime/libs/misc/include/misc/string_helpers.h @@ -50,7 +50,7 @@ inline std::vector split(const std::string &s, char delim) std::vector elems; while (std::getline(ss, item, delim)) { - elems.push_back(std::move(item)); + elems.push_back(item); } return elems; } diff --git a/runtime/libs/misc/src/EnvConfigSource.cpp b/runtime/libs/misc/src/EnvConfigSource.cpp new file mode 100644 index 0000000..3abc9d1 --- /dev/null +++ b/runtime/libs/misc/src/EnvConfigSource.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "misc/EnvConfigSource.h" + +#include + +namespace nnfw +{ +namespace misc +{ + +std::string EnvConfigSource::get(const std::string &key) const +{ + const char *value = std::getenv(key.c_str()); + if (value != nullptr) + { + return value; + } + else + { + return GeneralConfigSource::get(key); + } +} + +} // namespace misc +} // namespace nnfw diff --git a/runtime/libs/misc/src/GeneralConfigSource.cpp b/runtime/libs/misc/src/GeneralConfigSource.cpp new file mode 100644 index 0000000..298c166 --- /dev/null +++ b/runtime/libs/misc/src/GeneralConfigSource.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "misc/GeneralConfigSource.h" + +namespace nnfw +{ +namespace misc +{ + +std::string GeneralConfigSource::get(const std::string &key) const +{ + auto itr = _map.find(key); + if (itr == _map.end()) + { + return ""; + } + else + { + return itr->second; + } +} + +void GeneralConfigSource::set(const std::string &key, const std::string &val) { _map[key] = val; } + +} // namespace misc +} // namespace nnfw diff --git a/runtime/libs/misc/src/string_helpers.test.cpp b/runtime/libs/misc/src/string_helpers.test.cpp new file mode 100644 index 0000000..1111425 --- /dev/null +++ b/runtime/libs/misc/src/string_helpers.test.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "misc/string_helpers.h" + +#include + +TEST(StringHelpersTest, split) +{ + const std::string example = "abc;def;ghi"; + + auto str_vector = nnfw::misc::split(example, ';'); + + ASSERT_EQ(str_vector.size(), 3); + EXPECT_STREQ(str_vector[0].c_str(), "abc"); + EXPECT_STREQ(str_vector[1].c_str(), "def"); + EXPECT_STREQ(str_vector[2].c_str(), "ghi"); +} + +TEST(StringHelpersTest, neg_split_empty) +{ + const std::string example = ""; + + auto str_vector = nnfw::misc::split(example, ';'); + + ASSERT_EQ(str_vector.size(), 0); +} + +TEST(StringHelpersTest, neg_nonsplit) +{ + const std::string example = "abc;def;ghi"; + + auto str_vector = nnfw::misc::split(example, ':'); + + ASSERT_EQ(str_vector.size(), 1); + EXPECT_STREQ(str_vector[0].c_str(), example.c_str()); +} + +TEST(StringHelpersTest, append) +{ + auto append_str = nnfw::misc::str("abc", "-", 1); + + EXPECT_STREQ(append_str.c_str(), "abc-1"); +} + +TEST(StringHelpersTest, neg_append_nullstr) +{ + const char *null_str = nullptr; + auto append_str = nnfw::misc::str(null_str, null_str); + + ASSERT_EQ(append_str.size(), 0); +} + +TEST(StringHelpersTest, join) +{ + const std::vector example = {"abc", "def", "ghi"}; + + auto join_str = nnfw::misc::join(example.begin(), example.end(), ";"); + EXPECT_STREQ(join_str.c_str(), "abc;def;ghi"); +} + +TEST(StringHelpersTest, neg_join_empty) +{ + const std::vector example = {}; + + auto join_str = nnfw::misc::join(example.begin(), example.end(), ";"); + ASSERT_EQ(join_str.size(), 0); +} diff --git a/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp new file mode 100644 index 0000000..4cff606 --- /dev/null +++ b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "misc/tensor/IndexEnumerator.h" + +#include +#include + +#include + +using nnfw::misc::tensor::Shape; +using nnfw::misc::tensor::Index; +using nnfw::misc::tensor::IndexEnumerator; + +TEST(MiscIndexEnumeratorTest, iterate_full_range) +{ + const uint32_t H = 3; + const uint32_t W = 4; + + const Shape shape{H, W}; + + std::vector count; + + count.resize(H * W, 0); + + for (IndexEnumerator e{shape}; e.valid(); e.advance()) + { + const auto &ind = e.curr(); + + ASSERT_EQ(2, ind.rank()); + count.at(ind.at(0) * W + ind.at(1)) += 1; + } + + ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; })); +} + +TEST(MiscIndexEnumeratorTest, neg_zero_rank_shape) +{ + // Test abnormal case of empty shape + // It is expected not to throw any exception, do nothing + const Shape shape{}; + IndexEnumerator e{shape}; + ASSERT_NO_THROW(e.valid()); + ASSERT_NO_THROW(e.advance()); + SUCCEED(); +} diff --git a/runtime/libs/misc/src/tensor/IndexIterator.test.cpp b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp new file mode 100644 index 0000000..875786b --- /dev/null +++ b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "misc/tensor/IndexIterator.h" + +#include + +#include +#include + +using namespace nnfw::misc::tensor; + +TEST(MiscIndexIteratorTest, iterate) +{ + const Shape shape{3, 4, 7}; + + std::array array; + + array.fill(0); + + iterate(shape) << [&](const Index &index) { + assert(index.rank() == shape.rank()); + + const uint32_t rank = index.rank(); + + uint32_t offset = index.at(0); + + for (uint32_t axis = 1; axis < rank; ++axis) + { + offset *= shape.dim(axis); + offset += index.at(axis); + } + + array[offset] += 1; + }; + + ASSERT_TRUE(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; })); +} + +TEST(MiscIndexIteratorTest, neg_zero_rank_shape) +{ + // Test abnormal case of empty shape + // It is expected not to throw any exception, do nothing + const Shape shape{}; + + ASSERT_NO_THROW(iterate(shape) << ([](const Index &index) {})); + SUCCEED(); +} diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt index f88f131..cf8c520 100644 --- a/runtime/libs/ndarray/CMakeLists.txt +++ b/runtime/libs/ndarray/CMakeLists.txt @@ -3,8 +3,6 @@ add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp) set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(ndarray PUBLIC include) -#can't make this private because of c++ templates -target_include_directories(ndarray PUBLIC src) option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types") @@ -19,5 +17,12 @@ if(NOT ENABLE_TEST) return() endif(NOT ENABLE_TEST) -add_subdirectory(test) +add_executable(ndarray_test src/Array.test.cpp src/ContiguousSpan.test.cpp) +target_link_libraries(ndarray_test PRIVATE ndarray) +target_link_libraries(ndarray_test PRIVATE nnfw_coverage) +target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD}) + +add_test(ndarray_test ndarray_test) +install(TARGETS ndarray_test DESTINATION unittest_standalone) + add_subdirectory(example) diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h index 09e7917..568fe1c 100644 --- a/runtime/libs/ndarray/include/ndarray/Array.h +++ b/runtime/libs/ndarray/include/ndarray/Array.h @@ -22,37 +22,21 @@ #include "ContiguousSpan.h" #include "Shape.h" -#if __cplusplus < 201402L -#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions -#else -#include -#endif - #include -#include -#include #include -#include +#include #include +#include +#include +#include namespace ndarray { -// there is no index_sequence before c++14 -#if __cplusplus < 201402L - -template using index_sequence = cxx14::index_sequence; - -template using make_index_sequence = cxx14::make_index_sequence; - -#else - template using index_sequence = std::index_sequence; template using make_index_sequence = std::make_index_sequence<_Num>; -#endif //__cplusplus < 201402L - struct Strides { explicit Strides(Shape s) : _strides{} { fillStrides(s); } diff --git a/runtime/libs/ndarray/src/Array.test.cpp b/runtime/libs/ndarray/src/Array.test.cpp new file mode 100644 index 0000000..15e6760 --- /dev/null +++ b/runtime/libs/ndarray/src/Array.test.cpp @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndarray/Array.h" + +#include + +using namespace ndarray; + +TEST(NDArrayArrayTests, basic_data_test) +{ + float raw_data[] = {1, 2, 3, 4}; + int32_t raw_data_int[] = {1, 2, 3, 4}; + uint32_t raw_data_uint[] = {1, 2, 3, 4}; + int8_t raw_data_int8[] = {1, 2, 3, 4}; + + Array data22{raw_data, {2, 2}}; + Array data22_int{raw_data_int, {2, 2}}; + Array data22_uint{raw_data_uint, {2, 2}}; + Array data22_int8{raw_data_int8, {2, 2}}; + + ASSERT_FLOAT_EQ(data22.at(0, 0), 1); + ASSERT_FLOAT_EQ(data22.at(0, 1), 2); + ASSERT_FLOAT_EQ(data22.at(1, 0), 3); + ASSERT_FLOAT_EQ(data22.at(1, 1), 4); + ASSERT_EQ(data22.shape().rank(), 2); + ASSERT_EQ(data22.shape().dim(0), 2); + ASSERT_EQ(data22.shape().dim(1), 2); + + Array data14{raw_data, {1, 4}}; + ASSERT_FLOAT_EQ(data14.at(0, 0), 1); + ASSERT_FLOAT_EQ(data14.at(0, 1), 2); + ASSERT_FLOAT_EQ(data14.at(0, 2), 3); + ASSERT_FLOAT_EQ(data14.at(0, 3), 4); + ASSERT_EQ(data14.shape().rank(), 2); + ASSERT_EQ(data14.shape().dim(0), 1); + ASSERT_EQ(data14.shape().dim(1), 4); + + // + { + ContiguousSpan cs = data22.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_FLOAT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_FLOAT_EQ(cs2.at(3), 4); + + float sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_FLOAT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_FLOAT_EQ(cs4->at(3), 4); + } + + // + { + ContiguousSpan cs = data22.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_FLOAT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_FLOAT_EQ(cs2.at(3), 4); + + float sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_FLOAT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_FLOAT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_FLOAT_EQ(cs4->at(3), 4); + } + + // + { + ContiguousSpan cs = data22_int.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_EQ(cs2.at(3), 4); + + int32_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_EQ(cs4->at(3), 4); + } + + // + { + ContiguousSpan cs = data22_int.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_EQ(cs2.at(3), 4); + + int32_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = + std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_EQ(cs4->at(3), 4); + } + + // + { + ContiguousSpan cs = data22_uint.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_EQ(cs2.at(3), 4); + + uint32_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + } + + // + { + ContiguousSpan cs = data22_uint.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_EQ(cs2.at(3), 4); + + uint32_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = + std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_EQ(cs4->at(3), 4); + } + + // + { + ContiguousSpan cs = data22_int8.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_FLOAT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_FLOAT_EQ(cs2.at(3), 4); + + int8_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_EQ(cs4->at(3), 4); + + auto cs5 = ContiguousSpan(array_data.begin(), array_data.end()); + ASSERT_EQ(cs5.size(), 4); + ASSERT_EQ(cs5.at(3), 4); + } + + // + { + ContiguousSpan cs = data22_int8.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_FLOAT_EQ(cs.at(3), 4); + + ContiguousSpan cs2 = std::move(cs); + ASSERT_EQ(cs2.size(), 4); + ASSERT_FLOAT_EQ(cs2.at(3), 4); + + int8_t sum = 0; + for (auto it = cs2.begin(); it < cs2.end(); it++) + { + sum += *it; + } + ASSERT_EQ(sum, 10); + + std::vector array_data{1, 2, 3, 4}; + auto cs3 = std::make_unique>(array_data.begin(), array_data.end()); + ASSERT_EQ(cs3->size(), 4); + ASSERT_EQ(cs3->at(3), 4); + + auto cs4 = std::move(cs3); + ASSERT_EQ(cs3, nullptr); + ASSERT_EQ(cs4->size(), 4); + ASSERT_EQ(cs4->at(3), 4); + + auto cs5 = ContiguousSpan(array_data.begin(), array_data.end()); + ASSERT_EQ(cs5.size(), 4); + ASSERT_EQ(cs5.at(3), 4); + } + + Array lv = std::move(data14); + ASSERT_FLOAT_EQ(lv.at(0, 0), 1); + ASSERT_FLOAT_EQ(lv.at(0, 1), 2); + ASSERT_FLOAT_EQ(lv.at(0, 2), 3); + ASSERT_FLOAT_EQ(lv.at(0, 3), 4); +} + +TEST(NDArrayArrayTests, slice_write_test) +{ + // float + { + float raw_data[4] = {0}; + + Array data22{raw_data, {2, 2}}; + + data22.slice(1) = {1, 2}; + + ASSERT_FLOAT_EQ(data22.at(0, 0), 0); + ASSERT_FLOAT_EQ(data22.at(0, 1), 0); + ASSERT_FLOAT_EQ(data22.at(1, 0), 1); + ASSERT_FLOAT_EQ(data22.at(1, 1), 2); + } + + // int32_t + { + int32_t raw_data[4] = {0}; + Array data22{raw_data, {2, 2}}; + + data22.slice(1) = {1, 2}; + + ASSERT_EQ(data22.at(0, 0), 0); + ASSERT_EQ(data22.at(0, 1), 0); + ASSERT_EQ(data22.at(1, 0), 1); + ASSERT_EQ(data22.at(1, 1), 2); + } + + // uint32_t + { + uint32_t raw_data[4] = {0}; + Array data22{raw_data, {2, 2}}; + + data22.slice(1) = {1, 2}; + + ASSERT_EQ(data22.at(0, 0), 0); + ASSERT_EQ(data22.at(0, 1), 0); + ASSERT_EQ(data22.at(1, 0), 1); + ASSERT_EQ(data22.at(1, 1), 2); + } + + // int8_t + { + int8_t raw_data[4] = {0}; + Array data22{raw_data, {2, 2}}; + + data22.slice(1) = {1, 2}; + + ASSERT_EQ(data22.at(0, 0), 0); + ASSERT_EQ(data22.at(0, 1), 0); + ASSERT_EQ(data22.at(1, 0), 1); + ASSERT_EQ(data22.at(1, 1), 2); + } +} + +TEST(NDArrayArrayTests, slice_read_test) +{ + // float + { + float raw_data[4] = {1, 2, 3, 4}; + + Array data22{raw_data, {2, 2}}; + + auto slice = data22.slice(1); + + ASSERT_FLOAT_EQ(slice[0], 3); + ASSERT_FLOAT_EQ(slice[1], 4); + } + + // int32_t + { + int32_t raw_data[4] = {1, 2, 3, 4}; + + Array data22{raw_data, {2, 2}}; + + auto slice = data22.slice(1); + + ASSERT_EQ(slice[0], 3); + ASSERT_EQ(slice[1], 4); + } + + // uint32_t + { + uint32_t raw_data[4] = {1, 2, 3, 4}; + + Array data22{raw_data, {2, 2}}; + + auto slice = data22.slice(1); + + ASSERT_EQ(slice[0], 3); + ASSERT_EQ(slice[1], 4); + } + + // int8_t + { + int8_t raw_data[4] = {1, 2, 3, 4}; + + Array data22{raw_data, {2, 2}}; + + auto slice = data22.slice(1); + + ASSERT_EQ(slice[0], 3); + ASSERT_EQ(slice[1], 4); + } +} + +TEST(NDArrayArrayTests, multidim_test) +{ + // float + { + float raw_data[5] = {0, 1, 2, 3, 4}; + + Array data22{raw_data, {1, 1, 1, 1, 5}}; + + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4); + } + + // int32_t + { + int32_t raw_data[5] = {0, 1, 2, 3, 4}; + + Array data22{raw_data, {1, 1, 1, 1, 5}}; + + ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0); + ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1); + ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2); + ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3); + ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4); + } + + // uint32_t + { + uint32_t raw_data[5] = {0, 1, 2, 3, 4}; + + Array data22{raw_data, {1, 1, 1, 1, 5}}; + + ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0); + ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1); + ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2); + ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3); + ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4); + } + + // int8_t + { + int8_t raw_data[5] = {0, 1, 2, 3, 4}; + + Array data22{raw_data, {1, 1, 1, 1, 5}}; + + ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0); + ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1); + ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2); + ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3); + ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4); + } +} diff --git a/runtime/libs/ndarray/src/ContiguousSpan.test.cpp b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp new file mode 100644 index 0000000..dd11086 --- /dev/null +++ b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndarray/ContiguousSpan.h" + +#include + +using namespace ndarray; + +TEST(NDArrayContiguousSpanTests, slice_assign_test) +{ + // float + { + std::vector v1{1, 2, 3, 4, 5}; + std::vector v2(5); + + ContiguousSpan span1(v1.begin(), v1.end()); + ContiguousSpan span2(v2.begin(), v2.end()); + + span2.assign(span1); + + ASSERT_EQ(v1, v2); + ASSERT_EQ(span1.size(), 5); + ASSERT_EQ(span2.size(), 5); + + ASSERT_EQ(span2.at(2), 3); + ASSERT_EQ(span2.at(4), 5); + + ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); + + ContiguousSpan span3(span2.offset(1)); + ASSERT_EQ(span3.size(), 4); + ASSERT_EQ(span3.at(0), 2); + ASSERT_EQ(span3.at(1), 3); + ASSERT_EQ(span3[2], 4); + ASSERT_EQ(span3[3], 5); + + // const + ContiguousSpan span4(v1.begin(), v1.end()); + ASSERT_EQ(span4.size(), 5); + ASSERT_EQ(span4.at(0), 1); + ASSERT_EQ(span4.at(1), 2); + ASSERT_EQ(span4.at(2), 3); + ASSERT_EQ(span4[3], 4); + ASSERT_EQ(span4[4], 5); + + ContiguousSpan span5(span4.offset(1)); + ASSERT_EQ(span5.size(), 4); + ASSERT_EQ(span5.at(0), 2); + ASSERT_EQ(span5.at(1), 3); + ASSERT_EQ(span5[2], 4); + ASSERT_EQ(span5[3], 5); + + ASSERT_EQ(*(span5.data() + 2), *(span5.data() + 2)); + } + + // int32_t + { + std::vector v1{1, 2, 3, 4, 5}; + std::vector v2(5); + + ContiguousSpan span1(v1.begin(), v1.end()); + ContiguousSpan span2(v2.begin(), v2.end()); + + span2.assign(span1); + + ASSERT_EQ(v1, v2); + ASSERT_EQ(span1.size(), 5); + ASSERT_EQ(span2.size(), 5); + + ASSERT_EQ(span2.at(2), 3); + ASSERT_EQ(span2.at(4), 5); + + ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); + + ContiguousSpan span3(span2.offset(1)); + ASSERT_EQ(span3.size(), 4); + ASSERT_EQ(span3.at(0), 2); + ASSERT_EQ(span3.at(1), 3); + ASSERT_EQ(span3[2], 4); + ASSERT_EQ(span3[3], 5); + + // const + ContiguousSpan span4(v1.begin(), v1.end()); + ASSERT_EQ(span4.size(), 5); + ASSERT_EQ(span4.at(0), 1); + ASSERT_EQ(span4.at(1), 2); + ASSERT_EQ(span4.at(2), 3); + ASSERT_EQ(span4[3], 4); + ASSERT_EQ(span4[4], 5); + + ContiguousSpan span5(span4.offset(1)); + ASSERT_EQ(span5.size(), 4); + ASSERT_EQ(span5.at(0), 2); + ASSERT_EQ(span5.at(1), 3); + ASSERT_EQ(span5[2], 4); + ASSERT_EQ(span5[3], 5); + } + + // uint32_t + { + std::vector v1{1, 2, 3, 4, 5}; + std::vector v2(5); + + ContiguousSpan span1(v1.begin(), v1.end()); + ContiguousSpan span2(v2.begin(), v2.end()); + + span2.assign(span1); + + ASSERT_EQ(v1, v2); + ASSERT_EQ(span1.size(), 5); + ASSERT_EQ(span2.size(), 5); + + ASSERT_EQ(span2.at(2), 3); + ASSERT_EQ(span2.at(4), 5); + + ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); + + ContiguousSpan span3(span2.offset(1)); + ASSERT_EQ(span3.size(), 4); + ASSERT_EQ(span3.at(0), 2); + ASSERT_EQ(span3.at(1), 3); + ASSERT_EQ(span3[2], 4); + ASSERT_EQ(span3[3], 5); + + // const + ContiguousSpan span4(v1.begin(), v1.end()); + ASSERT_EQ(span4.size(), 5); + ASSERT_EQ(span4.at(0), 1); + ASSERT_EQ(span4.at(1), 2); + ASSERT_EQ(span4.at(2), 3); + ASSERT_EQ(span4[3], 4); + ASSERT_EQ(span4[4], 5); + + ContiguousSpan span5(span4.offset(1)); + ASSERT_EQ(span5.size(), 4); + ASSERT_EQ(span5.at(0), 2); + ASSERT_EQ(span5.at(1), 3); + ASSERT_EQ(span5[2], 4); + ASSERT_EQ(span5[3], 5); + } + + // int8_t + { + std::vector v1{1, 2, 3, 4, 5}; + std::vector v2(5); + + ContiguousSpan span1(v1.begin(), v1.end()); + ContiguousSpan span2(v2.begin(), v2.end()); + + span2.assign(span1); + + ASSERT_EQ(v1, v2); + ASSERT_EQ(span1.size(), 5); + ASSERT_EQ(span2.size(), 5); + + ASSERT_EQ(span2.at(2), 3); + ASSERT_EQ(span2.at(4), 5); + + ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); + + ContiguousSpan span3(span2.offset(1)); + ASSERT_EQ(span3.size(), 4); + ASSERT_EQ(span3.at(0), 2); + ASSERT_EQ(span3.at(1), 3); + ASSERT_EQ(span3[2], 4); + ASSERT_EQ(span3[3], 5); + + // const + ContiguousSpan span4(v1.begin(), v1.end()); + ASSERT_EQ(span4.size(), 5); + ASSERT_EQ(span4.at(0), 1); + ASSERT_EQ(span4.at(1), 2); + ASSERT_EQ(span4.at(2), 3); + ASSERT_EQ(span4[3], 4); + ASSERT_EQ(span4[4], 5); + + ContiguousSpan span5(span4.offset(1)); + ASSERT_EQ(span5.size(), 4); + ASSERT_EQ(span5.at(0), 2); + ASSERT_EQ(span5.at(1), 3); + ASSERT_EQ(span5[2], 4); + ASSERT_EQ(span5[3], 5); + } +} diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h deleted file mode 100644 index 8b78fb9..0000000 --- a/runtime/libs/ndarray/src/detail/cxx14.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _NDARRAY_CXX14_H_ -#define _NDARRAY_CXX14_H_ - -namespace ndarray -{ - -namespace cxx14 -{ - -template struct index_sequence -{ - using value_type = size_t; - - static constexpr std::size_t size() noexcept { return sizeof...(Nums); } -}; - -namespace detail -{ - -template struct _append; - -template struct _append> -{ - using result = index_sequence; -}; - -template struct make_index_sequence -{ - using result = - typename detail::_append::result>::result; -}; - -template <> struct make_index_sequence<1> -{ - using result = index_sequence<0>; -}; - -template <> struct make_index_sequence<0> -{ - using result = index_sequence<>; -}; - -} // namespace detail - -template using make_index_sequence = typename detail::make_index_sequence::result; - -} // namespace cxx14 - -} // namespace ndarray - -#endif //_NDARRAY_CXX14_H_ diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt deleted file mode 100644 index be1ed65..0000000 --- a/runtime/libs/ndarray/test/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -if(NOT TARGET ndarray) - return() -endif() - -add_executable(ndarray_test ndarray_test.cpp) - -target_link_libraries(ndarray_test PRIVATE ndarray) - -nnfw_find_package(GTest) -if(NOT GTest_FOUND) - message(STATUS "GTest not avaialble. Skipping NDArray test build") - return() -endif(NOT GTest_FOUND) - -target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD}) - -add_test(ndarray_test ndarray_test) -install(TARGETS ndarray_test DESTINATION unittest_standalone) diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp deleted file mode 100644 index 4b5ad57..0000000 --- a/runtime/libs/ndarray/test/ndarray_test.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "ndarray/Array.h" - -using namespace ndarray; - -TEST(NDArray_tests, basic_data_test) -{ - - float raw_data[] = {1, 2, 3, 4}; - - Array data22{raw_data, {2, 2}}; - - ASSERT_FLOAT_EQ(data22.at(0, 0), 1); - ASSERT_FLOAT_EQ(data22.at(0, 1), 2); - ASSERT_FLOAT_EQ(data22.at(1, 0), 3); - ASSERT_FLOAT_EQ(data22.at(1, 1), 4); - ASSERT_EQ(data22.shape().rank(), 2); - ASSERT_EQ(data22.shape().dim(0), 2); - ASSERT_EQ(data22.shape().dim(1), 2); - - Array data14{raw_data, {1, 4}}; - ASSERT_FLOAT_EQ(data14.at(0, 0), 1); - ASSERT_FLOAT_EQ(data14.at(0, 1), 2); - ASSERT_FLOAT_EQ(data14.at(0, 2), 3); - ASSERT_FLOAT_EQ(data14.at(0, 3), 4); - ASSERT_EQ(data14.shape().rank(), 2); - ASSERT_EQ(data14.shape().dim(0), 1); - ASSERT_EQ(data14.shape().dim(1), 4); - - ContiguousSpan cs = data22.flat(); - ASSERT_EQ(cs.size(), 4); - ASSERT_FLOAT_EQ(cs.at(3), 4); - - Array lv = std::move(data14); - ASSERT_FLOAT_EQ(lv.at(0, 0), 1); - ASSERT_FLOAT_EQ(lv.at(0, 1), 2); - ASSERT_FLOAT_EQ(lv.at(0, 2), 3); - ASSERT_FLOAT_EQ(lv.at(0, 3), 4); -} - -TEST(NDArray_tests, slice_write_test) -{ - float raw_data[4] = {0}; - - Array data22{raw_data, {2, 2}}; - - data22.slice(1) = {1, 2}; - - ASSERT_FLOAT_EQ(data22.at(0, 0), 0); - ASSERT_FLOAT_EQ(data22.at(0, 1), 0); - ASSERT_FLOAT_EQ(data22.at(1, 0), 1); - ASSERT_FLOAT_EQ(data22.at(1, 1), 2); -} - -TEST(NDArray_tests, slice_read_test) -{ - float raw_data[4] = {1, 2, 3, 4}; - - Array data22{raw_data, {2, 2}}; - - auto slice = data22.slice(1); - - ASSERT_FLOAT_EQ(slice[0], 3); - ASSERT_FLOAT_EQ(slice[1], 4); -} - -TEST(NDArray_tests, multidim_test) -{ - float raw_data[5] = {0, 1, 2, 3, 4}; - - Array data22{raw_data, {1, 1, 1, 1, 5}}; - - ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0); - ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1); - ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2); - ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3); - ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4); -} - -TEST(NDArray_tests, slice_assign_test) -{ - std::vector v1{1, 2, 3, 4, 5}; - std::vector v2(5); - - ContiguousSpan span1(v1.begin(), v1.end()); - ContiguousSpan span2(v2.begin(), v2.end()); - - span2.assign(span1); - - ASSERT_EQ(v1, v2); - ASSERT_EQ(span1.size(), 5); - ASSERT_EQ(span2.size(), 5); - - ASSERT_EQ(span2.at(2), 3); - ASSERT_EQ(span2.at(4), 5); - - ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); - - ContiguousSpan span3(span2.offset(1)); - ASSERT_EQ(span3.size(), 4); - ASSERT_EQ(span3.at(0), 2); - ASSERT_EQ(span3.at(1), 3); - ASSERT_EQ(span3.at(2), 4); - ASSERT_EQ(span3.at(3), 5); -} diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt index 88d52a5..3c9ca99 100644 --- a/runtime/onert/CMakeLists.txt +++ b/runtime/onert/CMakeLists.txt @@ -7,9 +7,3 @@ add_subdirectory(frontend) add_subdirectory(core) add_subdirectory(api) add_subdirectory(sample) - -if(NOT ENABLE_TEST) - return() -endif(NOT ENABLE_TEST) - -add_subdirectory(test) diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt index beb243a..badd5d1 100644 --- a/runtime/onert/api/CMakeLists.txt +++ b/runtime/onert/api/CMakeLists.txt @@ -10,6 +10,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h) target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header) target_link_libraries(${ONERT_DEV} PRIVATE onert_core) +target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc) target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD}) target_link_libraries(${ONERT_DEV} PRIVATE trix_loader) target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common) diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index 6f296a9..658cba4 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -193,7 +193,7 @@ typedef struct nnfw_tensorinfo * And inference is performed after {@link nnfw_run} is invoked. * *

{@link nnfw_close_session} should be called once - * if session is no longer need + * if session is no longer needed * * @param[out] session The session to be created * @return NNFW_STATUS_NO_ERROR if successful @@ -213,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session); /** * @brief Load model from nnpackage file or directory * - * The length of \p package_file_path must not execeed 1024 bytes including zero at the end. + * The length of \p package_file_path must not exceed 1024 bytes including zero at the end. * * @param[in] session nnfw_session loading the given nnpackage file/dir * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 45b3471..2fbb96f 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01001400 +#define NNFW_VERSION 0x01001500 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index 0ebd385..a0e6ee0 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -58,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0); * @param session the session to be created * @return NNFW_STATUS_NO_ERROR if successful */ -NNFW_STATUS nnfw_create_session(nnfw_session **session) -{ - NNFW_RETURN_ERROR_IF_NULL(session); - - *session = new (std::nothrow) nnfw_session(); - if (*session == nullptr) - return NNFW_STATUS_OUT_OF_MEMORY; - return NNFW_STATUS_NO_ERROR; -} +NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); } /* * Close a session instance diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index 62a0439..9b43dd3 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -25,6 +25,7 @@ #include "tflite_loader.h" #include "trix_loader.h" #include "json/json.h" +#include "ir/NNPkg.h" #include "ir/OpCode.h" #include "util/TracingCtx.h" @@ -110,9 +111,7 @@ std::string trim(const std::string &value) return value.substr(begin, range); } -using CfgKeyValues = std::unordered_map; - -bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) +bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues) { std::ifstream ifs(cfgfile); if (ifs.is_open()) @@ -143,19 +142,6 @@ bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) return false; } -void setConfigKeyValues(const CfgKeyValues &keyValues) -{ - auto configsrc = std::make_unique(); - - for (auto it = keyValues.begin(); it != keyValues.end(); ++it) - { - VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; - configsrc->set(it->first, it->second); - } - - onert::util::config_source_ext(std::move(configsrc)); -} - NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt) { using onert::ir::DataType; @@ -195,15 +181,59 @@ void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape, ti->dtype = datatype_to_nnfw_dtype(dtype); } +std::unique_ptr loadModel(const std::string filename, + const std::string model_type) +{ + if (model_type == "tflite") + return onert::tflite_loader::loadModel(filename.c_str()); + if (model_type == "circle") + return onert::circle_loader::loadModel(filename.c_str()); + if (model_type == "tvn") + return onert::trix_loader::loadModel(filename.c_str()); + + std::cerr << "Unsupported model type" << std::endl; + return std::unique_ptr(nullptr); +} + } // namespace nnfw_session::nnfw_session() - : _subgraphs{nullptr}, _compiler{nullptr}, _execution{nullptr}, - _kernel_registry{std::make_shared()}, _tracing_ctx{nullptr} + : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr}, + _kernel_registry{nullptr} { // DO NOTHING } +NNFW_STATUS nnfw_session::create(nnfw_session **session) +{ + if (session == nullptr) + return NNFW_STATUS_UNEXPECTED_NULL; + + // Create session + *session = new (std::nothrow) nnfw_session(); + if (*session == nullptr) + { + std::cerr << "Error during session creation" << std::endl; + return NNFW_STATUS_OUT_OF_MEMORY; + } + + // Initialize fields + try + { + (*session)->_kernel_registry = std::make_shared(); + } + catch (const std::exception &e) + { + std::cerr << "Error during session initialization : " << e.what() << std::endl; + delete *session; + *session = nullptr; + + return NNFW_STATUS_ERROR; + } + + return NNFW_STATUS_NO_ERROR; +} + nnfw_session::~nnfw_session() = default; NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) @@ -219,19 +249,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) try { - _subgraphs = onert::circle_loader::loadModel(buffer, size); + auto model = onert::circle_loader::loadModel(buffer, size); + _nnpkg = std::make_shared(std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique(_subgraphs.get()); - - _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -247,45 +274,28 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path) } std::string filename{model_file_path}; - if (filename.size() < 8) // .tflite or .circle + // TODO: Use std::filesystem::path when we can use c++17. + auto dotidx = filename.find_last_of('.'); + if (dotidx == std::string::npos) { - std::cerr << "Invalid model file path." << std::endl; + std::cerr << "Invalid model file path. Please use file with extension." << std::endl; return NNFW_STATUS_ERROR; } - - std::string model_type = filename.substr(filename.size() - 7, 7); - + std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot try { - if (model_type == ".tflite") - { - _subgraphs = onert::tflite_loader::loadModel(filename.c_str()); - } - else if (model_type == ".circle") - { - _subgraphs = onert::circle_loader::loadModel(filename.c_str()); - } - else if (model_type == ".tvn") - { - _subgraphs = onert::trix_loader::loadModel(filename.c_str()); - } - else - { - std::cerr << "Unsupported model type" << std::endl; + auto model = loadModel(filename, model_type); + if (model == nullptr) return NNFW_STATUS_ERROR; - } + _nnpkg = std::make_shared(std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique(_subgraphs.get()); - - _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -334,45 +344,59 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir) { auto filepath = package_path + std::string("/metadata/") + configs[0].asString(); - CfgKeyValues keyValues; + onert::util::CfgKeyValues keyValues; if (loadConfigure(filepath, keyValues)) { - setConfigKeyValues(keyValues); + onert::util::setConfigKeyValues(keyValues); } } - - auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model - auto model_type = model_types[0].asString(); // first model's type - if (model_type == "tflite") + _nnpkg = std::make_shared(); + for (uint32_t i = 0; i < models.size(); ++i) { - _subgraphs = onert::tflite_loader::loadModel(model_file_path); - } - else if (model_type == "circle") - { - _subgraphs = onert::circle_loader::loadModel(model_file_path); - } - else if (model_type == "tvn") - { - _subgraphs = onert::trix_loader::loadModel(model_file_path); + auto model_file_path = package_path + std::string("/") + models[i].asString(); + auto model_type = model_types[i].asString(); + auto model = loadModel(model_file_path, model_type); + if (model == nullptr) + return NNFW_STATUS_ERROR; + model->primary_subgraph()->bindKernelBuilder(_kernel_registry->getBuilder()); + _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model)); + _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig()); } - else + + auto toIODesc = [](std::string str) { + auto indices = nnfw::misc::split(str, ':'); + if (indices.size() != 3) + { + std::cerr << "IODesc should be 3-tuple." << std::endl; + return onert::ir::IODesc{}; + } + auto model_idx = static_cast(std::stoi(indices.at(0))); + auto subgraph_idx = static_cast(std::stoi(indices.at(1))); + auto operand_idx = static_cast(std::stoi(indices.at(2))); + return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx}; + }; + // read pkg-inputs and pkg-outputs + const Json::Value &pkg_inputs = root["pkg-inputs"]; + for (uint32_t i = 0; i < pkg_inputs.size(); ++i) + _nnpkg->addInput(toIODesc(pkg_inputs[i].asString())); + const Json::Value &pkg_outputs = root["pkg-outputs"]; + for (uint32_t i = 0; i < pkg_outputs.size(); ++i) + _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString())); + // read model-connect + const Json::Value &fromtos = root["model-connect"]; + for (uint32_t i = 0; i < fromtos.size(); ++i) { - std::cerr << "Unsupported model type in MANIFEST" << std::endl; - return NNFW_STATUS_ERROR; + const Json::Value &tos = fromtos[i]["to"]; + for (uint32_t j = 0; j < tos.size(); ++j) + _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString())); } - _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder()); + _state = State::MODEL_LOADED; } catch (const std::exception &e) { std::cerr << "Error during model loading : " << e.what() << std::endl; return NNFW_STATUS_ERROR; } - - _tracing_ctx = std::make_unique(_subgraphs.get()); - - _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); - - _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } @@ -396,9 +420,17 @@ NNFW_STATUS nnfw_session::prepare() try { - _subgraphs.reset(); - std::shared_ptr executors = _compiler->compile(); - _execution = std::make_unique(executors); + // TODO: Compile all models in case of multiple models + if (_nnpkg->model_count() > 2) + { + std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet." + << std::endl; + return NNFW_STATUS_ERROR; + } + auto compiler = std::make_unique(_nnpkg, _coptions); + _nnpkg.reset(); + _compiler_artifact = compiler->compile(); + _execution = std::make_unique(_compiler_artifact->_executors); } catch (const std::exception &e) { @@ -430,13 +462,14 @@ NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path) try { - _subgraphs.reset(); - std::vector> executor_maps = - _compiler->compile(_package_file_path.c_str(), map_file_path); + auto model = _nnpkg->primary_model(); + auto compiler = std::make_unique(model, *_coptions[0]); + _nnpkg.reset(); + auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path); - for (auto it = executor_maps.begin(); it != executor_maps.end(); ++it) + for (auto it = artifacts.begin(); it != artifacts.end(); ++it) { - _executions.push_back(std::make_shared(*it)); + _executions.push_back(std::make_shared(it->get()->_executors)); } make_dependency(); _threads.resize(_executions.size()); @@ -740,7 +773,8 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti) { // In this case, if we apply input shape in primary_subgraph, it will propagate after // compilation and excution - auto primary_subgraph = _subgraphs->primary(); + auto model = _nnpkg->primary_model(); + auto primary_subgraph = model->primary_subgraph(); auto ind = primary_subgraph->getInputs().at(index); auto &input = primary_subgraph->operands().at(ind); @@ -851,12 +885,12 @@ void nnfw_session::make_dependency() { for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++) { - auto out_graph = _executions[out_exe]->primary_subgraph(); + auto &out_graph = _executions[out_exe]->primary_subgraph(); for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++) { if (out_exe == in_exe) continue; - auto in_graph = _executions[in_exe]->primary_subgraph(); + auto &in_graph = _executions[in_exe]->primary_subgraph(); for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end(); out++) { @@ -971,7 +1005,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends) if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false) return NNFW_STATUS_ERROR; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; using namespace onert::util; @@ -1005,7 +1039,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend) return NNFW_STATUS_ERROR; } - auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend; + auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend; opcode_to_backend.emplace(onert::ir::toOpCode(key), backend); } catch (const std::exception &e) @@ -1024,7 +1058,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value) if (!key || !value) return NNFW_STATUS_UNEXPECTED_NULL; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; using namespace onert::util; @@ -1067,14 +1101,14 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value) const onert::ir::Graph *nnfw_session::primary_subgraph() { - if (_subgraphs) + if (_nnpkg != nullptr) { - assert(!_execution && _executions.empty()); - return _subgraphs->primary().get(); + assert(_execution == nullptr && _executions.empty()); + return _nnpkg->primary_model()->primary_subgraph().get(); } else { - assert(_execution || !_executions.empty()); + assert(_execution != nullptr || !_executions.empty()); // TODO Remove const_cast // We assumed the graph will not change after compilation, but shape could change if (!_executions.empty()) @@ -1094,7 +1128,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_ if (!key || !value) return NNFW_STATUS_UNEXPECTED_NULL; - auto &options = _compiler->options(); + auto &options = *_coptions[0]; auto check_boundary = [](size_t dest_size, std::string &src) { if (dest_size < src.length() + 1 /* for '\0' */) @@ -1138,9 +1172,9 @@ bool nnfw_session::isStateInitialized() { if (_state == State::INITIALIZED) { - assert(!_subgraphs); - assert(!_compiler); - assert(!_execution && _executions.empty()); + assert(_nnpkg == nullptr); + assert(_coptions.empty()); + assert(_execution == nullptr && _executions.empty()); return true; } else @@ -1153,9 +1187,9 @@ bool nnfw_session::isStateModelLoaded() { if (_state == State::MODEL_LOADED) { - assert(_subgraphs); - assert(_compiler); - assert(!_execution && _executions.empty()); + assert(_nnpkg != nullptr); + assert(!_coptions.empty()); + assert(_execution == nullptr && _executions.empty()); return true; } else @@ -1168,9 +1202,9 @@ bool nnfw_session::isStatePrepared() { if (_state == State::PREPARED) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } else @@ -1183,9 +1217,9 @@ bool nnfw_session::isStateRunning() { if (_state == State::RUNNING) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } return false; @@ -1195,9 +1229,9 @@ bool nnfw_session::isStateFinishedRun() { if (_state == State::FINISHED_RUN) { - assert(!_subgraphs); - assert(_compiler); - assert(_execution || !_executions.empty()); + assert(_nnpkg == nullptr); + assert(!_coptions.empty()); + assert(_execution != nullptr || !_executions.empty()); return true; } else @@ -1224,9 +1258,14 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings) { if (backend_settings == NULL) - { return NNFW_STATUS_ERROR; - } - _compiler->set_backend_from_str(backend_settings); + + if (!isStateModelLoaded()) + return NNFW_STATUS_INVALID_STATE; + + // Backend for all + auto &ms_options = _coptions[0]->manual_scheduler_options; + ms_options.setBackendMap(std::string{backend_settings}); + return NNFW_STATUS_NO_ERROR; } diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h index 6d75d89..9b729fd 100644 --- a/runtime/onert/api/src/nnfw_api_internal.h +++ b/runtime/onert/api/src/nnfw_api_internal.h @@ -20,7 +20,6 @@ #include "nnfw.h" #include "nnfw_experimental.h" -#include #include #include @@ -41,11 +40,13 @@ class Execution; namespace ir { class Graph; -class Subgraphs; +class Model; +class NNPkg; } // namespace ir namespace compiler { -class Compiler; +struct CompilerArtifact; +class CompilerOptions; } // namespace compiler } // namespace onert @@ -97,9 +98,18 @@ private: }; public: + /** + * @brief Factory method. It creates and initialize nnfw_session + * + * @note Use factory instead of constructor to get status + */ + static NNFW_STATUS create(nnfw_session **session); + +private: nnfw_session(); - ~nnfw_session(); +public: + ~nnfw_session(); NNFW_STATUS load_model_from_nnpackage(const char *package_file_path); NNFW_STATUS prepare(); NNFW_STATUS prepare_pipeline(const char *map_file_path); @@ -148,6 +158,10 @@ public: NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func); NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index); NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index); + /** + * @brief Set backends with string-encoded mapping from operation index to backend type + * (cpu, acl_cl) + */ NNFW_STATUS set_backends_per_operation(const char *backend_settings); private: @@ -161,15 +175,14 @@ private: private: State _state{State::INITIALIZED}; - std::shared_ptr _subgraphs; - std::unique_ptr _compiler; + std::shared_ptr _nnpkg; + std::vector> _coptions; + std::shared_ptr _compiler_artifact; std::unique_ptr _execution; std::shared_ptr _kernel_registry; std::vector _threads; std::vector> _executions; std::string _package_file_path; - - std::unique_ptr _tracing_ctx; }; #endif // __API_NNFW_API_INTERNAL_H__ diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 945ad83..301ded0 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -46,8 +46,10 @@ public: { const auto &graph = *data.graph; const auto &operands = data.graph->operands(); + const auto is_linear_executor = data.is_linear_executor; + auto context = std::make_unique(this, std::move(data)); - auto tm = createTensorManager(data.is_linear_executor); + auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared>(tm); auto tb = std::make_shared(operands, tm); context->tensor_registry = tr; diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index 62b163b..1c77130 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -46,8 +46,10 @@ public: { const auto &graph = *data.graph; const auto &operands = data.graph->operands(); + const auto is_linear_executor = data.is_linear_executor; + auto context = std::make_unique(this, std::move(data)); - auto tm = createTensorManager(data.is_linear_executor); + auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared>(tm); auto tb = std::make_shared(operands, tm); context->tensor_registry = tr; diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt index b61e582..99643b9 100644 --- a/runtime/onert/backend/cpu/CMakeLists.txt +++ b/runtime/onert/backend/cpu/CMakeLists.txt @@ -6,7 +6,7 @@ file(GLOB_RECURSE SOURCES "*.cc") add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES}) -target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker) +target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage) diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index ab0bb5f..6ed4799 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -20,6 +20,8 @@ #include #include +#include + namespace onert { namespace backend diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 75274dc..762ee73 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -244,17 +244,13 @@ std::unique_ptr KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 2255d5e..4672fe4 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -63,7 +63,7 @@ void ConvolutionLayer::convFloat32() getBuffer(_output)); } -void ConvolutionLayer::convQuant8() +void ConvolutionLayer::convQ8uPerTensor() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -99,7 +99,33 @@ void ConvolutionLayer::convQuant8() getBuffer(_output)); } -void ConvolutionLayer::convQuant8PerChannel() +void ConvolutionLayer::convQ8uPerChannel() +{ + nnfw::cker::ConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; + op_params.input_offset = -_input->data_zero_point(); + op_params.output_offset = _output->data_zero_point(); + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeQuantized(_activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + // NOTE: The following fields of ConvParams are not used: + // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max} + + nnfw::cker::Conv &kernel = *_conv_kernel; + kernel(op_params, getShape(_input), getBuffer(_input), getShape(_kernel), + getBuffer(_kernel), _kernel->data_zero_points().data(), getShape(_bias), + getBuffer(_bias), getShape(_output), getBuffer(_output)); +} + +void ConvolutionLayer::convQ8i() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -189,11 +215,15 @@ void ConvolutionLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - convQuant8(); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + convQ8uPerChannel(); + else + convQ8uPerTensor(); } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { - convQuant8PerChannel(); + convQ8i(); } else { @@ -210,8 +240,8 @@ void ConvolutionLayer::prepare() if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant()) { bool is_transposed = false; - kernel.prepare(getShape(_kernel), getBuffer(_kernel), getPaddingType(_paddingType), - is_transposed, _dilationWidthFactor, _dilationHeightFactor); + kernel.prepareF32(getShape(_kernel), getBuffer(_kernel), getPaddingType(_paddingType), + is_transposed, _dilationWidthFactor, _dilationHeightFactor); // Decrease reference of _kernel(weights) only when _kernel is constant if (is_transposed) @@ -225,8 +255,20 @@ void ConvolutionLayer::prepare() else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic()) { - kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth, - _strideHeight, _dilationWidthFactor, _dilationHeightFactor); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + { + GetQuantizedConvolutionMultipliersAndShifts( + _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), + _kernel->data_scales().size(), getShape(_kernel).Dims(0), + kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift()); + } + else + { + kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output), + _strideWidth, _strideHeight, _dilationWidthFactor, + _dilationHeightFactor); + } } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h index 5d7f7c2..9f5253c 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h @@ -50,9 +50,10 @@ public: public: void convFloat32(); - void convQuant8(); + void convQ8uPerTensor(); + void convQ8uPerChannel(); - void convQuant8PerChannel(); + void convQ8i(); void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, ir::PaddingType _paddingType, diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index 30641ec..8a48497 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -49,7 +49,7 @@ void DepthwiseConvolutionLayer::convFloat32() getBuffer(_output), _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::convQuant8() +void DepthwiseConvolutionLayer::convQ8uPerTensor() { int32_t output_activation_min = 0; int32_t output_activation_max = 0; @@ -84,11 +84,39 @@ void DepthwiseConvolutionLayer::convQuant8() getBuffer(_output), _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::convQuant8PerChannel() +void DepthwiseConvolutionLayer::convQ8uPerChannel() +{ + nnfw::cker::DepthwiseConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidth; + op_params.dilation_height_factor = _dilationHeight; + op_params.depth_multiplier = _multiplier; + op_params.input_offset = -_input->data_zero_point(); + op_params.output_offset = _output->data_zero_point(); + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeQuantized(_activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + // NOTE: The following fields of ConvParams are not used: + // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max} + + nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel( + op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(), + getShape(_input), getBuffer(_input), getShape(_kernel), getBuffer(_kernel), + _kernel->data_zero_points().data(), getShape(_bias), getBuffer(_bias), + getShape(_output), getBuffer(_output)); +} + +void DepthwiseConvolutionLayer::convQ8i() { if (!_prepared) { - prepareQuant8PerChannel(); + prepareQ8i(); _prepared = true; } @@ -119,7 +147,15 @@ void DepthwiseConvolutionLayer::convQuant8PerChannel() _external_context->ruy_context()); } -void DepthwiseConvolutionLayer::prepareQuant8PerChannel() +void DepthwiseConvolutionLayer::prepareQ8i() +{ + GetQuantizedConvolutionMultipliersAndShifts( + _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), + _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier, + _per_channel_output_shift); +} + +void DepthwiseConvolutionLayer::prepareQ8uPerChannel() { GetQuantizedConvolutionMultipliersAndShifts( _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(), @@ -155,7 +191,17 @@ void DepthwiseConvolutionLayer::configure( { if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic()) { - prepareQuant8PerChannel(); + prepareQ8i(); + _prepared = true; + } + } + else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() && + !_input->is_dynamic() && !_output->is_dynamic()) + { + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + { + prepareQ8uPerChannel(); _prepared = true; } } @@ -169,11 +215,15 @@ void DepthwiseConvolutionLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - convQuant8(); + const bool per_channel_quantized = _kernel->data_scales().size() > 1; + if (per_channel_quantized) + convQ8uPerChannel(); + else + convQ8uPerTensor(); } else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM) { - convQuant8PerChannel(); + convQ8i(); } else { diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h index 7205506..5c91010 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h @@ -40,9 +40,10 @@ public: public: void convFloat32(); - void convQuant8(); + void convQ8uPerTensor(); + void convQ8uPerChannel(); - void convQuant8PerChannel(); + void convQ8i(); void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, @@ -55,7 +56,8 @@ public: void run() override; private: - void prepareQuant8PerChannel(); + void prepareQ8i(); + void prepareQ8uPerChannel(); private: const IPortableTensor *_input{nullptr}; diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc index 8a6fe65..d89741c 100644 --- a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc +++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc @@ -121,7 +121,9 @@ Array decodeBoxes(const Array &raw_boxes, const Array box.y1); } - return array_cast(std::move(decoded_boxes_a), decoded_boxes_a.shape()); + auto decoded_boxes_a_shape = decoded_boxes_a.shape(); + + return array_cast(std::move(decoded_boxes_a), decoded_boxes_a_shape); } } diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h index 3cc4eaa..c73ae63 100644 --- a/runtime/onert/backend/ruy/ExternalContext.h +++ b/runtime/onert/backend/ruy/ExternalContext.h @@ -20,6 +20,8 @@ #include #include +#include + namespace onert { namespace backend diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc index c2f6a1f..b2bbf9b 100644 --- a/runtime/onert/backend/ruy/KernelGenerator.cc +++ b/runtime/onert/backend/ruy/KernelGenerator.cc @@ -42,17 +42,13 @@ std::unique_ptr KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt index 5455757..a94be24 100644 --- a/runtime/onert/backend/trix/CMakeLists.txt +++ b/runtime/onert/backend/trix/CMakeLists.txt @@ -1,6 +1,6 @@ set(LIB_ONERT_BACKEND_TRIX onert_backend_trix) -nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +nnfw_find_package(TRIXEngine QUIET 2.5.0) if(NOT TRIXEngine_FOUND) return() endif(NOT TRIXEngine_FOUND) diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h index 482932f..a7dbd7a 100644 --- a/runtime/onert/backend/trix/DevContext.h +++ b/runtime/onert/backend/trix/DevContext.h @@ -32,28 +32,42 @@ public: DevContext() { auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP); + // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now. + if (device_count > 2) + { + device_count = 2; + } + if (device_count <= 0) { - throw std::runtime_error("Unable to find TRIV2 NPU device"); + throw std::runtime_error("Unable to find TRIX NPU device"); } - // Use NPU 0 device - if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0) + for (int i = 0; i < device_count; i++) { - throw std::runtime_error("Failed to get TRIV2 NPU device handle"); + npudev_h h; + if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0) + { + throw std::runtime_error("Failed to get TRIX NPU device handle"); + } + _dev_handles.push_back(h); } } ~DevContext() { - if (_dev_handle != nullptr) + for (auto h : _dev_handles) { - unregisterNPUmodel_all(_dev_handle); - putNPUdevice(_dev_handle); + if (h != nullptr) + { + unregisterNPUmodel_all(h); + putNPUdevice(h); + } } } - npudev_h getDev() { return _dev_handle; } + npudev_h getDev(int i) { return _dev_handles[i]; } + int getDevSize() { return _dev_handles.size(); } template void setDataInfo(tensors_data_info *info, std::vector &tensors) { @@ -66,14 +80,15 @@ public: } } - template void setBuffer(generic_buffers *buf, std::vector &tensors) + template + void setBuffer(generic_buffers *buf, std::vector &tensors, int batch_size, int batch_index) { buf->num_buffers = static_cast(tensors.size()); for (uint32_t idx = 0; idx < buf->num_buffers; ++idx) { - buf->bufs[idx].addr = tensors[idx]->buffer(); - buf->bufs[idx].size = static_cast(tensors[idx]->total_size()); + buf->bufs[idx].size = static_cast(tensors[idx]->total_size() / batch_size); + buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size); buf->bufs[idx].type = BUFFER_MAPPED; } } @@ -106,9 +121,8 @@ private: } private: - // NPU device handle - // TODO Support multicore npu device - npudev_h _dev_handle; + // NPU device handles + std::vector _dev_handles; }; } // namespace trix diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc index 71fdf3f..3c49da9 100644 --- a/runtime/onert/backend/trix/ops/BulkLayer.cc +++ b/runtime/onert/backend/trix/ops/BulkLayer.cc @@ -18,6 +18,7 @@ #include #include +#include namespace onert { @@ -49,24 +50,56 @@ void BulkLayer::configure(const std::vector &inputs, throw std::runtime_error("Unable to extract the model metadata"); } + _model_id.resize(_dev_context->getDevSize()); + generic_buffer model_file; model_file.type = BUFFER_FILE; model_file.filepath = binary_path.c_str(); model_file.size = _meta->size; - if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0) + for (int i = 0; i < _dev_context->getDevSize(); i++) + { + if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0) + { + throw std::runtime_error("Failed to register npu model"); + } + } +} + +void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info, + output_buffers *output_buf, tensors_data_info *out_info) +{ + if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info)) + { + throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) + + ")"); + } + + if (submitNPU_request(dev, req_id)) { - throw std::runtime_error("Failed to register npu model"); + throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) + + ")"); } } void BulkLayer::run() { - int req_id; - if (createNPU_request(_dev_context->getDev(), _model_id, &req_id)) + // TODO: Remove too many assumption + // We assume user wants batch execution if user's input size is multiples of model's input size + int user_input_batch = (_inputs[0]->get_info().shape()).dim(0); + int model_input_batch = _meta->input_seg_dims[0][0]; + int batch_size = user_input_batch / model_input_batch; + bool is_batch_execution = (batch_size != 1 ? true : false); + + std::vector req_id(_dev_context->getDevSize()); + + for (int i = 0; i < _dev_context->getDevSize(); i++) { - throw std::runtime_error("Unable to create NPU request with model id (" + - std::to_string(_model_id) + ")"); + if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i])) + { + throw std::runtime_error("Unable to create NPU request with model id (" + + std::to_string(_model_id[i]) + ")"); + } } if (_meta->input_seg_num != _inputs.size()) @@ -84,28 +117,58 @@ void BulkLayer::run() _dev_context->setDataInfo(&in_info, _inputs); _dev_context->setDataInfo(&out_info, _outputs); - input_buffers input_buf; - output_buffers output_buf; - _dev_context->setBuffer(&input_buf, _inputs); - _dev_context->setBuffer(&output_buf, _outputs); + std::vector input_buf; + std::vector output_buf; + input_buf.resize(_dev_context->getDevSize()); + output_buf.resize(_dev_context->getDevSize()); + + std::vector> f(_dev_context->getDevSize()); - if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf, - &out_info)) + const int num_cores = _dev_context->getDevSize(); + if (is_batch_execution) { - throw std::runtime_error("Unable to create NPU request for model id (" + - std::to_string(_model_id) + ")"); + // TODO: Support for general number of cores(>2) + // Here we assume that 2 trix cores + for (int i = 0; i < (batch_size); i = i + num_cores) + { + for (int core = 0; core < num_cores; core++) + { + _dev_context->setBuffer(&input_buf[core], _inputs, batch_size, + i + core); + _dev_context->setBuffer(&output_buf[core], _outputs, batch_size, i + core); + } + for (int core = 0; core < num_cores; core++) + { + + if (i + core < batch_size) + { + f[core] = + std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core], + &input_buf[core], &in_info, &output_buf[core], &out_info); + } + } + for (int core = 0; core < num_cores; core++) + { + f[core].wait(); + } + } } - - if (submitNPU_request(_dev_context->getDev(), req_id)) + else { - throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) + - ")"); + _dev_context->setBuffer(&input_buf[0], _inputs, batch_size, 0); + _dev_context->setBuffer(&output_buf[0], _outputs, batch_size, 0); + + single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0], + &out_info); } - if (removeNPU_request(_dev_context->getDev(), req_id)) + for (int i = 0; i < _dev_context->getDevSize(); i++) { - throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) + - ")"); + if (removeNPU_request(_dev_context->getDev(i), req_id[i])) + { + throw std::runtime_error("Unable to remove NPU request with req id (" + + std::to_string(req_id[i]) + ")"); + } } } diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h index f7080cc..614c0f7 100644 --- a/runtime/onert/backend/trix/ops/BulkLayer.h +++ b/runtime/onert/backend/trix/ops/BulkLayer.h @@ -50,7 +50,7 @@ private: std::vector _inputs; std::vector _outputs; - uint32_t _model_id; + std::vector _model_id; npubin_meta *_meta; std::shared_ptr _dev_context; }; diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc index 28f729d..9580bec 100644 --- a/runtime/onert/backend/xnnpack/KernelGenerator.cc +++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc @@ -56,17 +56,13 @@ std::unique_ptr KernelGenerator::generate(ir::OperationI assert(_tensor_builder->dynamicTensorManager()); assert(_tensor_reg); - auto dyn_shape_inferer = std::make_shared(_ctx, _tensor_reg); - // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_operations_ctx; - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_operations_ctx.at(ind); + dyn_ctx->dynamic_shape_inferer = std::make_shared(_ctx, _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt index 6dbadf8..87c7a13 100644 --- a/runtime/onert/core/CMakeLists.txt +++ b/runtime/onert/core/CMakeLists.txt @@ -6,14 +6,18 @@ nnfw_find_package(Ruy REQUIRED) add_library(onert_core SHARED ${SOURCES}) set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# NOTE +# We publish public headers into developer package. +# To avoid mistake using private header in public header, do not define +# private target_include_directories scope for src/ directory. target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) -target_link_libraries(onert_core PUBLIC nnfw_lib_misc half) -target_link_libraries(onert_core PRIVATE nnfw_lib_cker) + +target_link_libraries(onert_core PRIVATE jsoncpp half) +target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker) target_link_libraries(onert_core PRIVATE nnfw_common) target_link_libraries(onert_core PRIVATE nnfw_coverage) target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD}) -target_link_libraries(onert_core PRIVATE jsoncpp) target_link_libraries(onert_core PRIVATE ruy) target_link_libraries(onert_core INTERFACE ruy_instrumentation) @@ -48,6 +52,8 @@ set(TEST_ONERT_CORE test_onert_core) add_executable(${TEST_ONERT_CORE} ${TESTS}) target_link_libraries(${TEST_ONERT_CORE} onert_core) +# Requires linking nnfw_coverage: check header coverage +target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage) target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD}) add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE}) diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h index 0a4d9c8..5604162 100644 --- a/runtime/onert/core/include/backend/ITensor.h +++ b/runtime/onert/core/include/backend/ITensor.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "ir/DataType.h" #include "ir/Layout.h" diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 58bfe34..cf2da4c 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -103,7 +103,7 @@ template void planTensors(const T_BackendContext &ct // 1. Scan DEF of outputs. If the DEF, allocate it // 2. Scan DEF of inputs. If variable tensor, allocate it // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - for (const auto op_ind : order) + for (const auto &op_ind : order) { const auto &op = graph.operations().at(op_ind); auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; @@ -161,7 +161,7 @@ template void planTensors(const T_BackendContext &ct } } - for (auto ind : operands_last_until_end) + for (auto &ind : operands_last_until_end) { tensor_builder->notifyLastUse(ind); } diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h index befe400..b44fcf8 100644 --- a/runtime/onert/core/include/compiler/BackendManager.h +++ b/runtime/onert/core/include/compiler/BackendManager.h @@ -17,12 +17,11 @@ #ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__ #define __ONERT_COMPILER_BACKEND_MANAGER_H__ -#include -#include - -#include "ir/Operands.h" #include "backend/Backend.h" -#include "backend/builtin/Backend.h" +#include "ir/Operands.h" + +#include +#include namespace onert { @@ -41,7 +40,7 @@ public: public: backend::Backend *get(const std::string &key); const backend::Backend *get(const std::string &key) const; - const backend::builtin::Backend *getBuiltin() const; + const backend::Backend *getBuiltin() const; const std::vector getAll() const { std::vector v; @@ -65,7 +64,7 @@ private: private: std::map> _handle_map; std::map> _gen_map; - backend::builtin::Backend *_builtin{nullptr}; + backend::Backend *_builtin{nullptr}; /** * @brief load builtin backend * diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h index 292de4b..f05d63c 100644 --- a/runtime/onert/core/include/compiler/Compiler.h +++ b/runtime/onert/core/include/compiler/Compiler.h @@ -22,8 +22,8 @@ #ifndef __ONERT_COMPILER_COMPILE_H_ #define __ONERT_COMPILER_COMPILE_H_ -#include "ir/Graph.h" -#include "exec/IExecutor.h" +#include "ir/NNPkg.h" +#include "exec/Executors.h" #include "util/TracingCtx.h" namespace onert @@ -40,6 +40,10 @@ enum class State struct ManualSchedulerOptions { +public: + void setBackendMap(const std::string &str); + +public: std::string backend_for_all; std::unordered_map opcode_to_backend; std::unordered_map index_to_backend; @@ -50,8 +54,14 @@ struct PartialGraphOptions std::unordered_map index_to_graph; }; -struct CompilerOptions +class CompilerOptions { +public: + // Set default values for CompilerOptions + // All these default values should not be fetched from Env, when we stop supporting Android NNAPI. + static std::unique_ptr fromGlobalConfig(); + +public: // GENERAL OPTIONS std::vector backend_list; @@ -65,75 +75,85 @@ struct CompilerOptions bool disable_compile; //< Run with Interpreter if true, try compilation otherwise bool fp16_enable; //< Whether fp16 mode ON/OFF PartialGraphOptions partial_graph_options; - - util::TracingCtx *tracing_ctx; //< Profiling information }; -CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs); +struct CompilerArtifact +{ + CompilerArtifact(void) = delete; + CompilerArtifact(std::shared_ptr executors, + std::unique_ptr tracing_ctx) + : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {}; + + std::shared_ptr _executors; + std::unique_ptr _tracing_ctx; +}; /** - * @brief Class to compile graph model + * @brief Class to compile NN package */ class Compiler { public: /** - * @brief Construct a new Compiler object - * @param[in] subgs All subgraphs of a model - * @param[in] tracing_ctx Profiling information + * @brief Construct a new Compiler object for single model + * @param[in] model model to compile + * @param[in] coptions Compiler Options + */ + Compiler(const std::shared_ptr &model, CompilerOptions &copt); + + /** + * @brief Construct a new Compiler object for NN package + * @param[in] nnpkg NN package to compile + * @param[in] coptions Compiler option vector for each model in package */ - Compiler(const std::shared_ptr &subgs, util::TracingCtx *tracing_ctx); + Compiler(const std::shared_ptr &nnpkg, + std::vector> &copts); public: /** * @brief Do compilation with the options * - * @return std::shared_ptr Executors as a result of compilation + * @return std::shared_ptr Executors as a result of compilation */ - std::shared_ptr compile(void); + std::shared_ptr compile(void); /** * @brief Do compilation with the options * - * @return std::vector> Executors as a result of compilation + * @return std::vector> Executors as a result of compilation * for pipeline */ - std::vector> compile(const char *package_file_path, - const char *map_file_path); + std::vector> compile(const char *package_file_path, + const char *map_file_path); State state(void) const { return _state; } - CompilerOptions &options() { return _options; } - /** * @brief Allow to compute float32 using float16 data type */ void enableToFp16(); /** - * @brief Set backends from string-encoded mappings from operation index to backend type (cpu, - * acl_cl) - */ - void set_backend_from_str(const char *backend_settings); - - /** * @brief Build the partial graphs to compile with original graph */ bool buildPartialGraph(uint32_t num_graphs); private: void checkProfilerConditions(); - std::shared_ptr &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); } + std::shared_ptr &primary_subgraph() + { + return _nnpkg->primary_model()->at(ir::SubgraphIndex{0}); + } private: - std::shared_ptr _subgraphs; + std::shared_ptr _nnpkg; // NOTE These executors does not have duplicated subgraph. This mean they do not allow support // subgraphs being called recursively because data of non-constant tensor of parent executor will // be updated by child executor. If you want to support subgraphs being called recursively, you // have to add allocate non-constant tensor memory of executors in execution time when each // subgraph is called. State _state; - CompilerOptions _options; + std::vector _voptions; }; } // namespace compiler diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h index 10ca8e9..7264f2a 100644 --- a/runtime/onert/core/include/compiler/LoweredGraph.h +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -60,9 +60,14 @@ public: private: void makeLowerInfo(const compiler::BackendResolver &backend_resolver); void dumpLowerInfo(); - void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options); + void lowerGraph(const compiler::CompilerOptions &options); private: + /** + * @brief Copy of target graph for lowering + * @note It uses copy of graph, not reference. + * It allows the original graph can be compiled multiple times. + */ ir::Graph _graph; ir::Graph _parent_graph; std::shared_ptr> _indexed_ranks; diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h index b2272e2..f701dc2 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInferer.h +++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h @@ -28,6 +28,36 @@ namespace onert { namespace compiler { +/** + * @brief Class that observe and update operands. + */ +class OperandObserver +{ +public: + /** + * @brief Constructor of OperandObserver + * + * @param operands Operands to be updated + */ + OperandObserver(const std::vector &operands) : _operands{operands} {} + /** + * @brief Destructor of OperandObserver + */ + virtual ~OperandObserver() = default; + +public: + /** + * @brief Update Shape and some OperandInfo of operands + * + * @param operands Operands to be updated + * @param unpredictable Whether runtime can predict shapes of operands in compilation time + */ + void updateShapes(const std::vector &changed_operands_info, + bool unpredictable = false); + +private: + std::vector _operands; +}; /** * @brief Class to infer shape before running kernels. It does the following: @@ -38,32 +68,42 @@ namespace compiler class StaticShapeInferer : public ir::OperationVisitor { public: - StaticShapeInferer( - const ir::SubgraphIndex &subg_idx, - const std::unordered_map> - &lowered_subgs) - : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()), - _operations(lowered_subgs.at(subg_idx)->graph().operations()), - _return_has_dynamic_tensor(false) - { /* empty */ + StaticShapeInferer(compiler::LoweredGraph *lowered_subg) + : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr}, + _child_inferers{} + { } virtual ~StaticShapeInferer() = default; public: + void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx, + std::unique_ptr &&subg_input_observer) noexcept + { + _subg_input_observers[subg_idx] = std::move(subg_input_observer); + } + + void setControlflowOutputObserver(std::unique_ptr &&output_observer) noexcept + { + _controlflow_output_observer = std::move(output_observer); + } + + void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer) + { + _child_inferers[subg_idx] = inferer; + } + /** - * @brief Infer shape of operands beloning to ops and set the output shape. + * @brief Infer shape of operands belonging to ops and set the output shape. * If output shape cannot be known without running op, mark it so that it can be allocated * when running kernel. - * @param op Operation - * @return @c true if op's input or output has any dynamic tensor; @c false otherwise. */ - bool infer(const ir::Operation &op); + void infer(void); void dump(); private: - void inferSubgraph(ir::SubgraphIndex subg_ind); bool checkDynamicInput(const ir::Operation &op); + bool checkDynamicOutput(const ir::Operation &op); void setDynamicOutput(const ir::Operation &op); private: @@ -113,6 +153,7 @@ private: void visit(const ir::operation::Unpack &op) override; void visit(const ir::operation::While &op) override; void visit(const ir::operation::DetectionPostProcess &op) override; + void visit(const ir::operation::Bulk &op) override; private: /** @@ -128,12 +169,11 @@ private: void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx); private: - const std::unordered_map> - &_lowered_subgs; - // _operands and _operations can be changed by controlflow operation - ir::Operands &_operands; // operands of current subgraph - ir::Operations &_operations; // operations of current subgraph - bool _return_has_dynamic_tensor; + compiler::LoweredGraph *_lowered_subg; + std::unordered_map> + _subg_input_observers; // child subg input + std::unique_ptr _controlflow_output_observer; // parent controlflow op output + std::unordered_map _child_inferers; }; } // namespace compiler diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h index b0a5cce..1e8083c 100644 --- a/runtime/onert/core/include/exec/Execution.h +++ b/runtime/onert/core/include/exec/Execution.h @@ -22,7 +22,7 @@ #define __ONERT_EXEC_EXECUTION_H__ #include "ir/Layout.h" -#include "exec/IExecutor.h" +#include "exec/Executors.h" #include "IODescription.h" #include @@ -46,7 +46,7 @@ public: * @brief Construct a new Execution object * @param[in] executor Model executor */ - Execution(const std::shared_ptr &executors); + Execution(const std::shared_ptr &executors); public: /** @@ -250,7 +250,7 @@ private: std::unique_ptr &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); }; private: - const std::shared_ptr _executors; + const std::shared_ptr _executors; IODescription _io_desc; std::deque> _async_io_descs; sem_t _async_io_descs_sem; diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h new file mode 100644 index 0000000..5adb0ed --- /dev/null +++ b/runtime/onert/core/include/exec/Executors.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_EXEC_EXECUTORS_H__ +#define __ONERT_EXEC_EXECUTORS_H__ + +#include "IExecutor.h" +#include "ir/NNPkg.h" + +namespace onert +{ +namespace exec +{ + +/** + * @brief Class to gather executors + */ +class Executors +{ +public: + Executors(void) = default; + Executors(std::unique_ptr model_edges) { _model_edges = std::move(model_edges); } + Executors(const Executors &) = delete; + Executors(Executors &&) = default; + + // TODO Use Executor index + void emplace(ir::SubgraphIndex idx, std::unique_ptr exec) + { + _executors.emplace(idx, std::move(exec)); + } + + std::unique_ptr &at(ir::SubgraphIndex idx) { return _executors.at(idx); } + + uint32_t inputSize() const; + + uint32_t outputSize() const; + + const ir::OperandInfo inputInfo(const ir::IOIndex &index); + + const ir::OperandInfo outputInfo(const ir::IOIndex &index); + + void execute(const IODescription &desc); + +private: + void executeEntries(const IODescription &desc); + +private: + // TODO Use Executor index + // Changing index will effect if/while compile and kernel implementation + std::unordered_map> _executors; + // NOTE _model_edges may use different struct type for executor implementation + std::unique_ptr _model_edges; +}; + +} // namespace exec +} // namespace onert + +#endif // __ONERT_EXEC_EXECUTORS_H__ diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h index cf3f2a8..7ff6d8b 100644 --- a/runtime/onert/core/include/exec/FunctionSequence.h +++ b/runtime/onert/core/include/exec/FunctionSequence.h @@ -75,8 +75,7 @@ public: public: // methods related to dynamic tensor struct DynamicTensorCtx { - ir::OperationIndex op_ind; - const ir::Operations *operations = nullptr; + const ir::Operation *op = nullptr; std::shared_ptr dynamic_shape_inferer = nullptr; }; diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h index adc6807..bb5b5af 100644 --- a/runtime/onert/core/include/exec/IExecutor.h +++ b/runtime/onert/core/include/exec/IExecutor.h @@ -107,8 +107,6 @@ struct IExecutor virtual const std::vector &getOutputTensors() const = 0; }; -using ExecutorMap = std::unordered_map>; - } // namespace exec } // namespace onert diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h index 7a76883..286caf7 100644 --- a/runtime/onert/core/include/ir/Graph.h +++ b/runtime/onert/core/include/ir/Graph.h @@ -20,9 +20,9 @@ #include #include +#include "ir/Model.h" #include "ir/Operands.h" #include "ir/Operations.h" -#include "ir/Subgraphs.h" namespace onert { @@ -50,7 +50,9 @@ private: }; public: - Graph(void); + explicit Graph(void); + explicit Graph(const Graph &); + ~Graph(void); // Graph Building @@ -87,10 +89,9 @@ public: void verify(void); void removeOperand(const OperandIndex &ind) { _operands.remove(ind); } void setLayout(Layout layout) { _layout = layout; } - void setSubgraphs(const std::shared_ptr &subgs) { _subgraphs = subgs; } - void setPartialgraphs(const std::shared_ptr &partialgraphs) + void setPartialModel(const std::shared_ptr &partial_model) { - _partialgraphs = partialgraphs; + _partialgraphs = partial_model; } void setTensorName(std::shared_ptr> &tensor_names) @@ -134,27 +135,25 @@ public: Operands &operands() { return _operands; } // TODO Remove this non-const accessor const Operations &operations() const { return _operations; } Operations &operations() { return _operations; } - const std::shared_ptr &subgraphs() const { return _subgraphs; } - std::shared_ptr &subgraphs() { return _subgraphs; } Layout layout() const { return _layout; } - std::shared_ptr &partialgraphs() { return _partialgraphs; } + std::shared_ptr &partialgraphs() { return _partialgraphs; } std::shared_ptr> &tensor_names() { return _tensor_names; } - std::unordered_map::iterator _name_to_input_begin() + std::unordered_map::const_iterator _name_to_input_begin() const { return _name_to_input.begin(); } - std::unordered_map::iterator _name_to_input_end() + std::unordered_map::const_iterator _name_to_input_end() const { return _name_to_input.end(); } - std::unordered_map::iterator _name_to_output_begin() + std::unordered_map::const_iterator _name_to_output_begin() const { return _name_to_output.begin(); } - std::unordered_map::iterator _name_to_output_end() + std::unordered_map::const_iterator _name_to_output_end() const { return _name_to_output.end(); } @@ -172,13 +171,11 @@ private: OperandIndexSequence _outputs; std::unordered_map _name_to_input; std::unordered_map _name_to_output; - // Child subgraphs - std::shared_ptr _subgraphs; // TFLite and circle's default layout is NHWC; Layout _layout{Layout::NHWC}; - // Partial Graphs - std::shared_ptr _partialgraphs; + // model for partial graphs + std::shared_ptr _partialgraphs; std::shared_ptr> _tensor_names; }; diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h index e01b090..f01a4c8 100644 --- a/runtime/onert/core/include/ir/Index.h +++ b/runtime/onert/core/include/ir/Index.h @@ -38,6 +38,9 @@ using IOIndex = ::onert::util::Index; struct SubgraphIndexTag; using SubgraphIndex = ::onert::util::Index; +struct ModelIndexTag; +using ModelIndex = ::onert::util::Index; + template std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index) { @@ -64,7 +67,12 @@ inline std::ostream &operator<<(std::ostream &o, const IOIndex &i) inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i) { - return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph + return _index_print_impl(o, "SUBGRAPH", i); +} + +inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i) +{ + return _index_print_impl(o, "MODEL", i); } } // namespace ir diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h index 0828101..0cdbcc2 100644 --- a/runtime/onert/core/include/ir/Layout.h +++ b/runtime/onert/core/include/ir/Layout.h @@ -18,6 +18,7 @@ #define __ONERT_IR_LAYOUT_H__ #include +#include #include namespace onert diff --git a/runtime/onert/core/include/ir/Model.h b/runtime/onert/core/include/ir/Model.h new file mode 100644 index 0000000..c3c0d87 --- /dev/null +++ b/runtime/onert/core/include/ir/Model.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_MODEL_H__ +#define __ONERT_IR_MODEL_H__ + +#include +#include + +#include "ir/Index.h" +#include "util/ObjectManager.h" + +namespace onert +{ +namespace ir +{ + +class Graph; + +class Model +{ +public: + Model() = default; + Model(const Model &obj) = default; + Model(Model &&) = default; + Model &operator=(const Model &) = default; + Model &operator=(Model &&) = default; + ~Model() = default; + + /** + * @brief Put subgraph in the container with a new Index for that + * + * @param[in] subg Subgraph to be pushed + * @param[in] index Index of subgraph to be pushed + * @return Created + */ + void push(SubgraphIndex index, const std::shared_ptr &subg) { _subgraphs[index] = subg; } + + /** + * @brief Remove the subgraph that is associated with the given index + * + * @param[in] index Index of the subgraph to be removed + * @return N/A + */ + void remove(const SubgraphIndex &index) { _subgraphs.erase(index); } + + /** + * @brief Get the subgraph that is associated with the given index + * + * @param[in] index Index of the subgraph to be returned + * @return Graph + */ + const std::shared_ptr &at(const SubgraphIndex &index) const + { + return _subgraphs.at(index); + } + /** + * @brief Get the subgraph that is associated with the given index + * + * @param[in] index Index of the subgraph to be returned + * @return Graph + */ + std::shared_ptr &at(const SubgraphIndex &index) { return _subgraphs.at(index); } + + /** + * @brief Get the subgraph that is associated with the given index + * + * @param[in] index Index of the subgraph to be returned + * @return true if such entry exists otherwise false + */ + bool exist(const SubgraphIndex &index) const + { + auto it = _subgraphs.find(index); + return it != _subgraphs.end(); + } + + /** + * @brief Iterate over the container with given function + * + * @param[in] fn Function to be run for every container entry + * @return N/A + */ + void iterate(const std::function &fn) const + { + for (const auto &e : _subgraphs) + { + fn(e.first, *e.second); + } + } + + /** + * @brief Iterate over the container with given function + * + * @param[in] fn Function to be run for every container entry + * @return N/A + */ + void iterate(const std::function &fn) + { + for (const auto &e : _subgraphs) + { + fn(e.first, *e.second); + } + } + + /** + * @brief Get count of Subgraphs + * + * @return count of Subgraphs + */ + size_t subgraphs_count() const { return _subgraphs.size(); } + + /** + * @brief Return the primary subgraph + * + * @return std::shared_ptr Primary subgraph + */ + std::shared_ptr primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); } + +private: + std::unordered_map> _subgraphs; +}; + +} // namespace ir +} // namespace onert + +#endif // __ONERT_IR_MODEL_H__ diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h new file mode 100644 index 0000000..d9f825e --- /dev/null +++ b/runtime/onert/core/include/ir/NNPkg.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_NNPKG_H__ +#define __ONERT_IR_NNPKG_H__ + +#include +#include +#include + +#include "ir/Index.h" +#include "ir/Model.h" + +namespace onert +{ +namespace ir +{ + +using IODesc = std::tuple; + +struct ModelEdge +{ + IODesc from; + IODesc to; +}; + +struct ModelEdgeEqual +{ + bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const + { + return lhs.from == rhs.from && lhs.to == rhs.to; + } +}; + +struct ModelEdgeHash +{ + size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept + { + unsigned long long h1 = (std::get<0>(edge.from).value() << 24) | + (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value(); + unsigned long long h2 = (std::get<0>(edge.to).value() << 24) | + (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value(); + return h1 + h2; + } +}; + +inline std::ostream &operator<<(std::ostream &o, const IODesc &od) +{ + o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value(); + return o; +} + +using ModelEdgeSet = std::unordered_set; + +/** + * @brief Struct to gather model I/O information in multimodel NN package + * Model I/O will have role one of below + * - Package input/output + * - Edge's start/finish point between model + */ +struct ModelEdges +{ + std::vector pkg_inputs; + std::vector pkg_outputs; + ModelEdgeSet edges; +}; + +class NNPkg +{ +public: + NNPkg() = default; + NNPkg(const NNPkg &obj) = default; + NNPkg(NNPkg &&) = default; + NNPkg &operator=(const NNPkg &) = default; + NNPkg &operator=(NNPkg &&) = default; + ~NNPkg() = default; + + NNPkg(std::shared_ptr model) { _models[ModelIndex{0}] = model; } + std::shared_ptr primary_model() { return _models.at(onert::ir::ModelIndex{0}); } + + /** + * @brief Put model at index + * + * @param[in] model Model to be pushed + * @param[in] index Index where Model is to be pushed + */ + void push(ModelIndex index, const std::shared_ptr &model) { _models[index] = model; } + + /** + * @brief Get the count of model + * + * @return the count of models + */ + size_t model_count() const { return _models.size(); } + + /** + * @brief Get model at index + * + * @param[in] index Index of the model to be returned + * @return Model at index + */ + const std::shared_ptr &model(const ModelIndex &index) const { return _models.at(index); } + /** + * @brief Get model at index + * + * @param[in] index Index of the model to be returned + * @return Model at index + */ + std::shared_ptr &model(const ModelIndex &index) { return _models.at(index); } + + /** + * @brief Get pkg_input at index + * + * @param[in] index Index of pkg_input to be returned + * @return IODesc at index + */ + const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; } + /** + * @brief Get pkg_input at index + * + * @param[in] index Index of pkg_input to be returned + * @return IODesc at index + */ + IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; } + /** + * @brief Add input at the end + * + * @param[in] input Input IODesc to be pushed + */ + void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); } + + /** + * @brief Get pkg_output at index + * + * @param[in] index Index of pkg_output to be returned + * @return IODesc at index + */ + const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; } + /** + * @brief Get pkg_output at index + * + * @param[in] index Index of pkg_output to be returned + * @return IODesc at index + */ + IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; } + /** + * @brief Add output at the end + * + * @param[in] output Output IODesc to be pushed + */ + void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); } + + /** + * @brief Add edge between models at the end + * + * @param[in] from from IODesc + * @param[in] to to IODesc + */ + void addEdge(const IODesc &from, const IODesc &to) + { + std::cout << from << " -> " << to << std::endl; + _edges.edges.insert(ModelEdge{from, to}); + } + /** + * @brief Get model edge set + * @return Edge set reference + */ + const ModelEdges &model_edges() { return _edges; } + + // TODO: Add iterate() or getter for edges + +private: + std::unordered_map> _models; + ModelEdges _edges; +}; + +} // namespace ir +} // namespace onert + +#endif // __ONERT_IR_NNPKG_H__ diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h deleted file mode 100644 index 6cb3694..0000000 --- a/runtime/onert/core/include/ir/Subgraphs.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_SUBGRAPHS_H__ -#define __ONERT_IR_SUBGRAPHS_H__ - -#include -#include - -#include "ir/Index.h" -#include "util/ObjectManager.h" - -namespace onert -{ -namespace ir -{ - -class Graph; - -class Subgraphs -{ -public: - Subgraphs() = default; - Subgraphs(const Subgraphs &obj) = default; - Subgraphs(Subgraphs &&) = default; - Subgraphs &operator=(const Subgraphs &) = default; - Subgraphs &operator=(Subgraphs &&) = default; - ~Subgraphs() = default; - - /** - * @brief Put subgraph in the container with a new Index for that - * - * @param[in] subg Subgraph to be pushed - * @param[in] index Index of subgraph to be pushed - * @return Created - */ - void push(SubgraphIndex index, const std::shared_ptr &subg) { _subgraphs[index] = subg; } - - /** - * @brief Remove the subgraph that is associated with the given index - * - * @param[in] index Index of the subgraph to be removed - * @return N/A - */ - void remove(const SubgraphIndex &index) { _subgraphs.erase(index); } - - /** - * @brief Get the subgraph that is associated with the given index - * - * @param[in] index Index of the subgraph to be returned - * @return Graph - */ - const std::shared_ptr &at(const SubgraphIndex &index) const - { - return _subgraphs.at(index); - } - /** - * @brief Get the subgraph that is associated with the given index - * - * @param[in] index Index of the subgraph to be returned - * @return Graph - */ - std::shared_ptr &at(const SubgraphIndex &index) { return _subgraphs.at(index); } - - /** - * @brief Get the subgraph that is associated with the given index - * - * @param[in] index Index of the subgraph to be returned - * @return true if such entry exists otherwise false - */ - bool exist(const SubgraphIndex &index) const - { - auto it = _subgraphs.find(index); - return it != _subgraphs.end(); - } - - /** - * @brief Iterate over the container with given function - * - * @param[in] fn Function to be run for every container entry - * @return N/A - */ - void iterate(const std::function &fn) const - { - for (const auto &e : _subgraphs) - { - fn(e.first, *e.second); - } - } - - /** - * @brief Iterate over the container with given function - * - * @param[in] fn Function to be run for every container entry - * @return N/A - */ - void iterate(const std::function &fn) - { - for (const auto &e : _subgraphs) - { - fn(e.first, *e.second); - } - } - - /** - * @brief Get count of Subgraphs - * - * @return count of Subgraphs - */ - size_t count() const { return _subgraphs.size(); } - - /** - * @brief Return the primary subgraph - * - * @return std::shared_ptr Primary sugraph - */ - std::shared_ptr primary() const { return _subgraphs.at(SubgraphIndex{0}); } - -private: - std::unordered_map> _subgraphs; -}; - -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_SUBGRAPHS_H__ diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h index 0a00da5..3c50627 100644 --- a/runtime/onert/core/include/ir/TypeInfo.h +++ b/runtime/onert/core/include/ir/TypeInfo.h @@ -50,11 +50,7 @@ public: public: DataType type() const { return _type; } - float scale() const - { - assert(_quant.scales.size() == 1); - return _quant.scales[0]; - } + float scale() const { return _quant.scales[0]; } const std::vector &scales() const { return _quant.scales; } int32_t zero_point() const { diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h index 1825f7f..3c20f39 100644 --- a/runtime/onert/core/include/ir/operation/Bulk.h +++ b/runtime/onert/core/include/ir/operation/Bulk.h @@ -32,6 +32,8 @@ public: struct Param { std::string binary_path; + std::vector origin_input_shapes; + std::vector origin_output_shapes; }; public: diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h index db76f9d..4369ca5 100644 --- a/runtime/onert/core/include/util/CalculateActivationRange.h +++ b/runtime/onert/core/include/util/CalculateActivationRange.h @@ -17,6 +17,8 @@ #ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ #define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__ +#include + #include "ir/InternalType.h" namespace onert diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst index 89a9a6a..4bbc02a 100644 --- a/runtime/onert/core/include/util/Config.lst +++ b/runtime/onert/core/include/util/Config.lst @@ -20,7 +20,7 @@ // Name | Type | Default CONFIG(GRAPH_DOT_DUMP , int , "0") -CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;bcq") // FIXME Remove bcq +CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq CONFIG(OP_BACKEND_ALLOPS , std::string , "") CONFIG(OP_BACKEND_MAP , std::string , "") CONFIG(DISABLE_COMPILE , bool , "0") diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h index da8bc86..d53b810 100644 --- a/runtime/onert/core/include/util/ConfigSource.h +++ b/runtime/onert/core/include/util/ConfigSource.h @@ -17,17 +17,17 @@ #ifndef __ONERT_UTIL_CONFIG_SOURCE_H__ #define __ONERT_UTIL_CONFIG_SOURCE_H__ -#include - -#include "IConfigSource.h" +#include +#include namespace onert { namespace util { -void config_source(std::unique_ptr &&source); -void config_source_ext(std::unique_ptr &&source); +using CfgKeyValues = std::unordered_map; + +void setConfigKeyValues(const CfgKeyValues &keyValues); bool toBool(const std::string &val); int toInt(const std::string &val); diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h deleted file mode 100644 index 8c5d0e8..0000000 --- a/runtime/onert/core/include/util/EnvConfigSource.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ - -#include - -#include "util/GeneralConfigSource.h" - -namespace onert -{ -namespace util -{ - -class EnvConfigSource final : public GeneralConfigSource -{ -public: - std::string get(const std::string &key) const override; - -private: - std::unordered_map _default_attributes; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h deleted file mode 100644 index dedc820..0000000 --- a/runtime/onert/core/include/util/GeneralConfigSource.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ - -#include - -#include "util/IConfigSource.h" - -namespace onert -{ -namespace util -{ - -class GeneralConfigSource : public IConfigSource -{ -public: - GeneralConfigSource() = default; - - std::string get(const std::string &key) const override; - void set(const std::string &key, const std::string &val); - -private: - std::unordered_map _map; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h deleted file mode 100644 index 07b0984..0000000 --- a/runtime/onert/core/include/util/IConfigSource.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__ -#define __ONERT_UTIL_I_CONFIG_SOURCE_H__ - -#include - -namespace onert -{ -namespace util -{ - -struct IConfigSource -{ - /** - * @brief Destroy the IConfigSource object - */ - virtual ~IConfigSource() = default; - - /** - * @brief get the value for the matching key - * - * @param key string key to search - * @return string value associated with the key - */ - virtual std::string get(const std::string &key) const = 0; -}; - -} // namespace util -} // namespace onert - -#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__ diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h index a493789..36b6c85 100644 --- a/runtime/onert/core/include/util/ObjectManager.h +++ b/runtime/onert/core/include/util/ObjectManager.h @@ -17,14 +17,13 @@ #ifndef __ONERT_UTIL_OBJECT_MANAGER_H__ #define __ONERT_UTIL_OBJECT_MANAGER_H__ -#include -#include -#include -#include +#include "util/logging.h" +#include +#include +#include #include - -#include "util/logging.h" +#include namespace onert { @@ -208,7 +207,7 @@ public: l.push_back(e.first); } - for (auto index : l) + for (auto &index : l) { fn(index, *_objects[index]); } diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h index 334257d..da284d2 100644 --- a/runtime/onert/core/include/util/TracingCtx.h +++ b/runtime/onert/core/include/util/TracingCtx.h @@ -19,7 +19,7 @@ #include "ir/Graph.h" #include "ir/Index.h" -#include "ir/Subgraphs.h" +#include "ir/Model.h" #include #include @@ -37,29 +37,9 @@ class TracingCtx public: /** * @brief Create and store unique session id managed by this class - * Note that this constructor can be called by multiple sessions running in parallely. - * Use this constructor only when there is only one subgraph in a model. + * @note This constructor can be called by multiple session running in parallely. */ - TracingCtx(const ir::Graph *primary_subgraph) - { - decideSessionID(); - _subgraph_indices.emplace(primary_subgraph, 0); - } - - /** - * @brief Create and store unique session id managed by this class - * Note that this constructor can be called by multiple sessions running in parallely. - */ - TracingCtx(const onert::ir::Subgraphs *subgraphs) - { - assert(subgraphs); - - decideSessionID(); - - auto count = subgraphs->count(); - for (size_t i = 0; i < count; i++) - _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i); - } + TracingCtx(void) { decideSessionID(); } uint32_t getSessionId() const { return _session_id; } diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h index e67be98..390dbb5 100644 --- a/runtime/onert/core/src/backend/builtin/ExternalContext.h +++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h @@ -24,6 +24,8 @@ #include #include +#include + namespace onert { namespace backend diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc index 3d6358d..fa2fc0b 100644 --- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc @@ -16,12 +16,10 @@ #include "KernelGenerator.h" -#include -#include #include "kernel/IfLayer.h" -#include "kernel/WhileLayer.h" #include "kernel/PermuteLayer.h" -#include "exec/ExecutorBase.h" +#include "kernel/WhileLayer.h" + #include "exec/FunctionSequence.h" namespace onert @@ -35,12 +33,12 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d const std::shared_ptr &tensor_reg, const std::shared_ptr &external_context) : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager}, - _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{ - external_context} + _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{ + external_context} { UNUSED_RELEASE(_graph); UNUSED_RELEASE(_tensor_registries); - UNUSED_RELEASE(_executor_map); + UNUSED_RELEASE(_executors); } std::unique_ptr KernelGenerator::generate(ir::OperationIndex ind) @@ -48,20 +46,16 @@ std::unique_ptr KernelGenerator::generate(ir::OperationI assert(_dyn_tensor_manager); assert(_tensor_reg); - auto dyn_shape_inferer = - std::make_unique(_graph.operands(), _tensor_reg); - auto ret = std::make_unique(); // Prepare to handle dynamic tensors later auto dyn_ctx = std::make_shared(); { - dyn_ctx->op_ind = ind; - dyn_ctx->operations = &_graph.operations(); - dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - - ret->dynamic_tensor_ctx(dyn_ctx); + dyn_ctx->op = &_graph.operations().at(ind); + dyn_ctx->dynamic_shape_inferer = + std::make_unique(_graph.operands(), _tensor_reg); } + ret->dynamic_tensor_ctx(dyn_ctx); auto &op = _graph.operations().at(ind); op.accept(*this); @@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::If &node) output_tensors.emplace_back(output_tensor); } - // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of + // IfLayer just set Executors instead of then and else executor to avoid complexity of // creating executor recusively const auto cond_tensor = input_tensors.front(); input_tensors.erase(input_tensors.begin()); auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>( - cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map, + cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors, _external_context); _return_fn = std::move(fn); @@ -136,10 +130,10 @@ void KernelGenerator::visit(const ir::operation::While &node) output_tensors.emplace_back(output_tensor); } - // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of + // WhileLayer just set Executors instead of cond and body executor to avoid complexity of // creating executor recusively auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>( - input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map, + input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context); _return_fn = std::move(fn); diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h index 00ad962..d5931ca 100644 --- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h +++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h @@ -17,13 +17,14 @@ #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__ -#include "exec/IExecutor.h" +#include "DynamicTensorManager.h" #include "ExternalContext.h" -#include "ir/Graph.h" -#include "TensorBuilder.h" -#include "compiler/TensorRegistries.h" -#include "backend/basic/KernelGeneratorBase.h" #include "TensorRegistry.h" +#include "../../compiler/TensorRegistries.h" + +#include "backend/basic/KernelGeneratorBase.h" +#include "exec/Executors.h" +#include "ir/Graph.h" namespace onert { @@ -43,10 +44,10 @@ public: { _tensor_registries = tensor_registries; } - void setExecutorMap(const std::shared_ptr &executor_map) + void setExecutors(const std::shared_ptr &executors) { // FIXME Using shared_ptr's raw pointer! - _executor_map = executor_map.get(); + _executors = executors.get(); } std::unique_ptr generate(ir::OperationIndex ind) override; @@ -64,7 +65,7 @@ private: DynamicTensorManager *_dyn_tensor_manager; std::shared_ptr _tensor_reg; compiler::TensorRegistries _tensor_registries; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; const std::shared_ptr _external_context; }; diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc index fdd9d9d..cdb4196 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc @@ -16,10 +16,6 @@ #include "IfLayer.h" -#include -#include "exec/ExecutorBase.h" -#include "PermuteLayer.h" - namespace onert { namespace backend @@ -33,13 +29,13 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor, const std::vector input_tensors, const std::vector output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map, + exec::Executors *executors, const std::shared_ptr &external_context) : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, - _executor_map{executor_map}, _external_context{external_context} + _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors}, + _external_context{external_context} { - // At this point, executor_map may not have executors of then subg and else subg + // At this point, executors may not have executors of then subg and else subg } void IfLayer::run() @@ -65,12 +61,12 @@ void IfLayer::run() if (cond_result) { VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl; - subg_exec = _executor_map->at(_then_subg_index).get(); + subg_exec = _executors->at(_then_subg_index).get(); } else { VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl; - subg_exec = _executor_map->at(_else_subg_index).get(); + subg_exec = _executors->at(_else_subg_index).get(); } subg_exec->execute(_input_tensors, _output_tensors); diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h index f12ef36..fa5537a 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h @@ -18,7 +18,7 @@ #define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__ #include -#include +#include #include "../ExternalContext.h" namespace onert @@ -37,8 +37,7 @@ public: const std::vector input_tensors, const std::vector output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, - exec::ExecutorMap *executor_map, - const std::shared_ptr &external_context); + exec::Executors *executors, const std::shared_ptr &external_context); public: void run() override; @@ -49,7 +48,7 @@ private: const std::vector _output_tensors; const ir::SubgraphIndex _then_subg_index; const ir::SubgraphIndex _else_subg_index; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; const std::shared_ptr _external_context; }; diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc index 20cd87a..ddaecdf 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc @@ -16,9 +16,9 @@ #include "PermuteLayer.h" -#include "exec/ShapeConverter.h" +#include "../../../exec/ShapeConverter.h" -#include "ruy/context.h" // from @ruy +#include // from @ruy namespace onert { diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h index ac5470e..227e324 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h @@ -17,10 +17,10 @@ #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ #define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__ -#include "exec/IPermuteFunction.h" -#include "exec/IExecutor.h" #include "../ExternalContext.h" -#include "ruy/thread_pool.h" // from @ruy +#include "../../../exec/IPermuteFunction.h" + +#include // from @ruy namespace onert { diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc index 81b4a63..8e006c5 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc @@ -16,11 +16,12 @@ #include "WhileLayer.h" -#include -#include -#include "exec/ExecutorBase.h" -#include #include "PermuteLayer.h" +#include "../../../exec/ExecutorBase.h" + +#include + +#include namespace onert { @@ -34,14 +35,14 @@ namespace kernel WhileLayer::WhileLayer(const std::vector input_tensors, const std::vector output_tensors, const ir::SubgraphIndex &cond_subg_index, - const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map, + const ir::SubgraphIndex &body_subg_index, exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr &external_context) : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map}, + _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors}, _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context} { - // At this point, executor_map may not have executors of cond subg and body subg + // At this point, executors may not have executors of cond subg and body subg } void WhileLayer::run() @@ -56,8 +57,8 @@ void WhileLayer::run() // // Run cond subg // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> // "_dst_tensors" - auto cond_exec = _executor_map->at(_cond_subg_index).get(); - auto body_exec = _executor_map->at(_body_subg_index).get(); + auto cond_exec = _executors->at(_cond_subg_index).get(); + auto body_exec = _executors->at(_body_subg_index).get(); // Need a temp tensor to hold the cond subgraph output assert(cond_exec->getOutputTensors().size() == 1); diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h index 9121027..8551b3d 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h @@ -18,7 +18,7 @@ #define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__ #include -#include +#include #include #include #include @@ -41,7 +41,7 @@ public: WhileLayer(const std::vector input_tensors, const std::vector output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, - exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager, + exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr &external_context); public: @@ -52,7 +52,7 @@ private: const ir::SubgraphIndex _body_subg_index; const std::vector _input_tensors; const std::vector _output_tensors; - exec::ExecutorMap *_executor_map; + exec::Executors *_executors; basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors const std::shared_ptr _external_context; }; diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc index 0d6051b..44442c0 100644 --- a/runtime/onert/core/src/compiler/BackendManager.cc +++ b/runtime/onert/core/src/compiler/BackendManager.cc @@ -16,16 +16,11 @@ #include "compiler/BackendManager.h" -#include -#include +#include "../backend/builtin/Backend.h" +#include "../backend/builtin/Config.h" -#include "backend/Backend.h" -#include "backend/builtin/Backend.h" -#include "backend/builtin/Config.h" -#include "backend/IConfig.h" -#include "util/logging.h" -#include "util/ConfigSource.h" -#include "misc/string_helpers.h" +#include +#include static const char *SHARED_LIB_EXT = #if defined(__APPLE__) && defined(__MACH__) @@ -152,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const return nullptr; } -const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; } +const backend::Backend *BackendManager::getBuiltin() const { return _builtin; } } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 6a1d8fc..7be9c1e 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -18,29 +18,27 @@ #include "ExecutorFactory.h" #include "ShapeValidator.h" +#include "pass/ConstantOutputPass.h" +#include "pass/OddOutputPass.h" +#include "pass/PassRunner.h" +#include "pass/UnusedOperandEliminationPass.h" +#include "../backend/builtin/Config.h" +#include "../dumper/dot/DotDumper.h" +#include "../interp/InterpExecutor.h" +#include "../ir/OperationCloner.h" +#include "../ir/OperationDumper.h" +#include "../ir/verifier/Verifier.h" -#include -#include "compiler/BackendManager.h" -#include "compiler/IScheduler.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" #include "compiler/StaticShapeInferer.h" -#include "compiler/OperationLowerInfo.h" -#include "compiler/pass/ConstantOutputPass.h" -#include "compiler/pass/OddOutputPass.h" -#include "compiler/pass/PassRunner.h" -#include "compiler/pass/UnusedOperandEliminationPass.h" -#include "exec/ExecTime.h" -#include "ir/verifier/Verifier.h" -#include "dumper/dot/DotDumper.h" -#include "compiler/Linear.h" -#include "interp/InterpExecutor.h" #include "util/ConfigSource.h" #include "util/logging.h" -#include "ir/OperationDumper.h" -#include "ir/OperationCloner.h" -#include "misc/string_helpers.h" -#include "json/json.h" + +#include +#include +#include + +// TODO Remove using fstream header +#include namespace { @@ -86,8 +84,104 @@ void verboseOptions(compiler::CompilerOptions &options) << std::noboolalpha; } -void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs, - const std::string &str) +std::unordered_map> +createStaticShapeInferers( + const std::unordered_map> + &lowered_subgs) +{ + // Allocate StaticShapeInferer per each subgraph + std::unordered_map> inferers; + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + inferers[subg_index] = std::make_unique(lowered_subg.get()); + } + + // Append observers in all StaticShapeInferers + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + + // TODO: Change this iteration for all to controlflow iteration + lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &, + const ir::Operation &op) { + // A Function to append child inferers. These make it possible for a StaticShapeInferer to + // call StaticShapeInferes of child subgraphs recursively + auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) { + auto *child_inferer = inferers.at(child_subg_idx).get(); + inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer); + }; + + // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer + // to update inputs of child subgraphs + auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector child_subg_inputs; + auto &child_subg = lowered_subgs.at(child_subg_idx)->graph(); + for (const auto &input_idx : child_subg.getInputs()) + { + auto operand_ptr = child_subg.operands().getRawPtr(input_idx); + child_subg_inputs.emplace_back(operand_ptr); + } + inferers.at(subg_index) + ->appendSubgInputObserver(child_subg_idx, + std::make_unique(child_subg_inputs)); + }; + + // A Function to set controlflow output observers. This makes it possible for a + // StaticShapeInferer to update outputs of parent controlflow opeerations + auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) { + std::vector cf_outputs; + auto &subg = lowered_subg->graph(); + for (const auto &output_idx : op.getOutputs()) + { + auto operand_ptr = subg.operands().getRawPtr(output_idx); + cf_outputs.emplace_back(operand_ptr); + } + inferers.at(child_subg_idx) + ->setControlflowOutputObserver(std::make_unique(cf_outputs)); + }; + + // Append Observers in a StaticShapeInferer + if (op.opcode() == ir::OpCode::If) + { + const auto &if_op = nnfw::misc::polymorphic_downcast(op); + + appendChildInferer(if_op.param().then_subg_index); + appendChildInferer(if_op.param().else_subg_index); + + appendSubgraphInputObserver(if_op.param().then_subg_index); + appendSubgraphInputObserver(if_op.param().else_subg_index); + + setControlFlowOutputObserver(if_op.param().then_subg_index); + } + else if (op.opcode() == ir::OpCode::While) + { + const auto &while_op = nnfw::misc::polymorphic_downcast(op); + + appendChildInferer(while_op.param().cond_subg_index); + appendChildInferer(while_op.param().body_subg_index); + + appendSubgraphInputObserver(while_op.param().cond_subg_index); + appendSubgraphInputObserver(while_op.param().body_subg_index); + + setControlFlowOutputObserver(while_op.param().body_subg_index); + } + }); + } + + return inferers; +} + +} // namespace + +namespace onert +{ + +namespace compiler +{ +void ManualSchedulerOptions::setBackendMap(const std::string &str) { // TODO Support multiple subgraphs for manual scheduling auto key_val_list = nnfw::misc::split(str, ';'); @@ -102,37 +196,24 @@ void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgr const auto &key_str = key_val.at(0); const auto &val = key_val.at(1); auto key = static_cast(std::stoi(key_str)); - - subgs.at(ir::SubgraphIndex{0}) - ->operations() - .at(ir::OperationIndex{key}); // Check if exist, or this wil throw - ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val); + this->index_to_backend.emplace(ir::OperationIndex{key}, val); } } -} // namespace - -namespace onert -{ - -namespace compiler +std::unique_ptr CompilerOptions::fromGlobalConfig() { - -CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) -{ - CompilerOptions options; - options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); - options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); - options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); - options.executor = util::getConfigString(util::config::EXECUTOR); - options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); - options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); - options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); - options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); - + auto o = std::make_unique(); + o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); + o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); + o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); + o->executor = util::getConfigString(util::config::EXECUTOR); + o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER); + o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE); + o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE); + o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE); { // Backend for all - auto &ms_options = options.manual_scheduler_options; + auto &ms_options = o->manual_scheduler_options; // Default value for op_backend_all is first element in the backend list ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS); @@ -151,54 +232,67 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) // Index to Backend auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP); - setBackendMap(ms_options, subgs, map_str); + ms_options.setBackendMap(map_str); } - return options; + return o; } -Compiler::Compiler(const std::shared_ptr &subgs, util::TracingCtx *tracing_ctx) - : _subgraphs{subgs}, _state{State::CREATED} +Compiler::Compiler(const std::shared_ptr &model, CompilerOptions &copt) + : _nnpkg{std::make_shared(model)}, _state{State::CREATED}, _voptions{&copt} { - // Set default values for CompilerOptions - // All these default values should not be fetched from Env, when we stop supporting Android NN - // API. - _options = fetchCompilerOptionsFromGlobalConfig(*subgs); - - _options.tracing_ctx = tracing_ctx; + // DO NOTHING } -void Compiler::enableToFp16() { _options.fp16_enable = true; } +Compiler::Compiler(const std::shared_ptr &nnpkg, + std::vector> &copts) + : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{} +{ + for (uint32_t i = 0; i < copts.size(); i++) + { + _voptions.push_back(copts[i].get()); + } +} -void Compiler::set_backend_from_str(const char *backend_settings) +void Compiler::enableToFp16() { - assert(_subgraphs != nullptr); - // Backend for all - auto &ms_options = _options.manual_scheduler_options; - setBackendMap(ms_options, *_subgraphs, std::string{backend_settings}); + for (auto options : _voptions) + options->fp16_enable = true; } void Compiler::checkProfilerConditions() { - if (!_options.he_scheduler) + if (_nnpkg->model_count() != 1) + throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet"); + + auto &options = *_voptions[0]; + + if (options.he_scheduler) throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling."); - if (_options.executor != "Dataflow") + if (options.executor != "Dataflow") throw std::runtime_error("Profiling mode works only with 'Dataflow' executor"); } bool Compiler::buildPartialGraph(uint32_t num_graphs) { - if (_subgraphs->count() > 1) + // Use 1st model and options only on partial graph (pipeline) compile + assert(_nnpkg->model_count() == 1); + assert(_voptions.size() == 1); + + auto model = _nnpkg->primary_model(); + auto &options = *_voptions[0]; + + if (model->subgraphs_count() > 1) return false; - auto partialgraphs = std::make_shared(); + auto partialgraphs = std::make_shared(); for (uint32_t idx = 0; idx < num_graphs; idx++) { auto partialgraph = std::make_unique(); partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph)); } - _subgraphs->primary()->setPartialgraphs(partialgraphs); + model->primary_subgraph()->setPartialModel(partialgraphs); auto partial_graph = primary_subgraph()->partialgraphs(); @@ -208,8 +302,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) for (auto use_operation : use_operations) { - auto graph_index = _options.partial_graph_options.index_to_graph.find(use_operation); - if (graph_index == _options.partial_graph_options.index_to_graph.end()) + auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation); + if (graph_index == options.partial_graph_options.index_to_graph.end()) { throw std::runtime_error("Invalid Partition Map"); } @@ -230,8 +324,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) primary_subgraph()->operations().iterate( [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) { - auto graph_index = _options.partial_graph_options.index_to_graph.find(operation_index); - if (graph_index == _options.partial_graph_options.index_to_graph.end()) + auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index); + if (graph_index == options.partial_graph_options.index_to_graph.end()) { throw std::runtime_error("Invalid Partition Map"); } @@ -259,7 +353,7 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) assert(new_operation_index == operation_index); }); - for (uint32_t idx = 0; idx < partial_graph->count(); idx++) + for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++) { auto partition = partial_graph->at(ir::SubgraphIndex{idx}); @@ -282,10 +376,10 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) auto use_operations = primary_subgraph()->operands().at(operand_index).getUses(); auto iter = use_operations.begin(); ir::SubgraphIndex graph_index = - _options.partial_graph_options.index_to_graph.find(*iter++)->second; + options.partial_graph_options.index_to_graph.find(*iter++)->second; while (iter != use_operations.end()) { - if (graph_index != _options.partial_graph_options.index_to_graph.find(*iter)->second && + if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second && !partition->getOutputs().contains(operand_index)) { partition->addOutput(operand_index, @@ -344,96 +438,157 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs) return true; } -std::shared_ptr Compiler::compile(void) +std::shared_ptr Compiler::compile(void) { - // Set control flow backend for control flow operators + for (auto options : _voptions) { + // Set control flow backend for control flow operators auto &builtin_id = backend::builtin::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; - } + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; - // FIXME This is a workaround for bcq operations, should remove it - { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + // FIXME This is a workaround for bcq operations, should remove it + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + + // FIXME This is a workaround for bulk operations, should remove it + options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix"; + + verboseOptions(*options); } - verboseOptions(_options); + // NYI: allow one model compilation + auto const model_count = _nnpkg->model_count(); + if (model_count != _voptions.size()) + throw std::runtime_error{"Model count and option vector size mismatch"}; - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { - // Mandatory passes - pass::PassRunner{} - .append(std::make_unique(subg)) - .append(std::make_unique(subg)) - .run(); + for (uint32_t i = 0; i < model_count; i++) + { + _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + // Mandatory passes + pass::PassRunner{} + .append(std::make_unique(subg)) + .append(std::make_unique(subg)) + .run(); - // Optimizations - pass::PassRunner{}.append(std::make_unique(subg)).run(); - }); + // Optimizations + pass::PassRunner{}.append(std::make_unique(subg)).run(); + }); + } /*************************************************** * Prepare compilation phase ***************************************************/ - auto executors = std::make_shared(); - // Compilable check // TODO: Support hybrid execution - // execution between interpreter and compiled executor (including control flow) - if (_options.disable_compile) + if (_voptions[0]->disable_compile) { - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + if (model_count > 1) + throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"}; + + auto executors = std::make_shared(); + + _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { executors->emplace(index, std::make_unique(subg)); }); _state = State::COMPILED; - return executors; + return std::make_shared(executors, nullptr); } // Mode check - if (_options.he_profiling_mode) + // TODO handle option for each model + if (_voptions[0]->he_profiling_mode) checkProfilerConditions(); /*************************************************** * Backend independent analysis & optimization phase ***************************************************/ - auto dump_level = static_cast(_options.graph_dump_level); + // TODO Handle dump level for each model + auto dump_level = static_cast(_voptions[0]->graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper(dump_level); + + // Tracing context + auto tracing_ctx = std::make_unique(); + + // Model edge context + std::unique_ptr model_edges = nullptr; // Lower: Assign backend std::unordered_map> lowered_subgs; - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); - dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); - // Lower: Assign backend - lowered_subgs[index] = std::make_unique(subg, _options); + if (model_count == 1) + { + _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value())); + // Lower: Assign backend + lowered_subgs[index] = std::make_unique(subg, *_voptions[0]); + // Set tracing_ctx for copied graph + tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value()); + }); + } + else + { + // TODO Support tracing_ctx for multiple model + tracing_ctx = nullptr; + + // Copy model edge context + model_edges = std::make_unique(_nnpkg->model_edges()); - subg.setSubgraphs(nullptr); - }); + for (uint32_t i = 0; i < model_count; i++) + { + auto model = _nnpkg->model(ir::ModelIndex{i}); + if (model->subgraphs_count() != 1) + throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"}; + auto subg = model->primary_subgraph(); + dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i)); + + // For multimodel, model index is used for lowered graph index in lowered graph map + // and index type is SubgraphIndex + // TODO Find better way to represent lowered graph index for multimodel's subgraph + lowered_subgs[ir::SubgraphIndex{i}] = + std::make_unique(*model->primary_subgraph(), *_voptions[i]); + } + } - _subgraphs.reset(); + _nnpkg.reset(); for (auto &pair : lowered_subgs) { const auto &subg_index = pair.first; auto &lowered_subg = pair.second; - onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); - dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); + dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value())); } // Shape inference. { - const auto primary_subg_idx = ir::SubgraphIndex{0}; - StaticShapeInferer inferer(primary_subg_idx, lowered_subgs); - auto &lowered_subg = lowered_subgs.at(primary_subg_idx); - auto ordered_ops = lowered_subg->graph().topolSortOperations(); - for (auto op_ind : ordered_ops) + // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called + // recursively + std::unordered_map> inferers = + createStaticShapeInferers(lowered_subgs); + + if (model_count == 1) { - const auto &op = lowered_subg->graph().operations().at(op_ind); - bool has_dynamic_tensor = inferer.infer(op); - lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor); + const auto primary_subg_idx = ir::SubgraphIndex{0}; + inferers.at(primary_subg_idx)->infer(); + + for (const auto &pair : inferers) + { + const auto inferer = pair.second.get(); + inferer->dump(); + } + } + else + { + // Assume multi model has only one subgraph on each model + for (const auto &pair : inferers) + { + const auto inferer = pair.second.get(); + inferer->infer(); + inferer->dump(); + } } - inferer.dump(); } // Shape validation @@ -452,8 +607,7 @@ std::shared_ptr Compiler::compile(void) /************************************************************* * Backend independent analysis & optimization phase finished *************************************************************/ - - executors = std::make_shared(); + auto executors = std::make_shared(std::move(model_edges)); for (auto &pair : lowered_subgs) { const auto &subg_index = pair.first; @@ -464,24 +618,31 @@ std::shared_ptr Compiler::compile(void) std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); - auto executor = std::unique_ptr{ - ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)}; + + auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0]; + auto executor = std::unique_ptr{ExecutorFactory::get().create( + std::move(lowered_subg), tracing_ctx.get(), options, executors)}; executor->setIndexedRanks(indexed_ranks); - executors->insert(std::make_pair(subg_index, std::move(executor))); + executors->emplace(subg_index, std::move(executor)); } /******************************** * Code generation phase finished ********************************/ _state = State::COMPILED; - return executors; + return std::make_shared(executors, std::move(tracing_ctx)); } -std::vector> Compiler::compile(const char *package_file_path, - const char *map_file_path) +std::vector> Compiler::compile(const char *package_file_path, + const char *map_file_path) { - std::vector> executors; - auto executor_map = std::make_shared(); + // Allow one model compilation for pipeline + if (_nnpkg->model_count() != 1) + throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."}; + assert(_voptions.size() == 1); + + auto model = _nnpkg->primary_model(); + auto &options = *_voptions[0]; std::string package_path(package_file_path); std::string partition_map_file; @@ -508,7 +669,7 @@ std::vector> Compiler::compile(const char *pa num_graphs = np.asUInt(); for (uint32_t i = 0; i < (uint32_t)map.size(); ++i) { - _options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] = + options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] = ir::SubgraphIndex{map[i].asUInt()}; } } @@ -525,25 +686,25 @@ std::vector> Compiler::compile(const char *pa // Set control flow backend for control flow operators { auto &builtin_id = backend::builtin::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id; } // FIXME This is a workaround for bcq operations, should remove it { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; } - // It doesn't support tracing in case of partial graph + // FIXME This is a workaround for bulk operations, should remove it { - _options.tracing_ctx = nullptr; + options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix"; } - verboseOptions(_options); + verboseOptions(options); - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { // Mandatory passes auto part = subg.partialgraphs(); part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) { @@ -566,38 +727,41 @@ std::vector> Compiler::compile(const char *pa // Compilable check // TODO: Support hybrid execution - // execution between interpreter and compiled executor (including control flow) - if (_options.disable_compile) + if (options.disable_compile) { - _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - executor_map->emplace(index, std::make_unique(subg)); - executors.push_back(executor_map); + std::vector> results; + auto executors = std::make_shared(); + + model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { + executors->emplace(index, std::make_unique(subg)); }); + results.push_back(std::make_shared(executors, nullptr)); _state = State::COMPILED; - return executors; + return results; } // Mode check - if (_options.he_profiling_mode) + if (options.he_profiling_mode) checkProfilerConditions(); /*************************************************** * Backend independent analysis & optimization phase ***************************************************/ - auto dump_level = static_cast(_options.graph_dump_level); + auto dump_level = static_cast(options.graph_dump_level); + onert::dumper::dot::DotDumper dot_dumper_part(dump_level); // Lower: Assign backend std::unordered_map> lowered_partialgraphs; - _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { + model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) { auto part = subg.partialgraphs(); part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) { - onert::dumper::dot::DotDumper dot_dumper_part(partialgraph, dump_level); - dot_dumper_part.dump(nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value())); + dot_dumper_part.dump(partialgraph, + nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value())); // // Lower: Assign backend lowered_partialgraphs[pindex] = - std::make_unique(subg, partialgraph, _options); - partialgraph.setSubgraphs(nullptr); + std::make_unique(subg, partialgraph, options); }); }); @@ -606,25 +770,20 @@ std::vector> Compiler::compile(const char *pa const auto &partialgraph_index = pair.first; auto &lowered_partialgraph = pair.second; - onert::dumper::dot::DotDumper dot_dumper_lowered_part(lowered_partialgraph.get(), dump_level); - dot_dumper_lowered_part.dump("after_lower_subg_partialgraph-" + - std::to_string(partialgraph_index.value())); + dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" + + std::to_string(partialgraph_index.value())); } // Partial Graph shape inference + std::unordered_map> inferers = + createStaticShapeInferers(lowered_partialgraphs); + // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times for (auto &pair : lowered_partialgraphs) { const auto &partialgraph_index = pair.first; - auto &lowered_partialgraph = pair.second; - StaticShapeInferer partial_inferer(partialgraph_index, lowered_partialgraphs); - auto ordered_ops = lowered_partialgraph->graph().topolSortOperations(); - for (auto op_ind : ordered_ops) - { - const auto &op = lowered_partialgraph->graph().operations().at(op_ind); - bool has_dynamic_tensor = partial_inferer.infer(op); - lowered_partialgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor); - } - partial_inferer.dump(); + const auto partial_inferer = inferers.at(partialgraph_index).get(); + partial_inferer->infer(); + partial_inferer->dump(); } // Shape validation @@ -652,9 +811,11 @@ std::vector> Compiler::compile(const char *pa ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph))); } + std::vector> results; for (auto &pair : ordered) { - executor_map = std::make_shared(); + auto executors = std::make_shared(); + const auto &partialgraph_index = ir::SubgraphIndex(pair.first); auto &lowered_partialgraph = pair.second; auto indexed_ranks = lowered_partialgraph->indexed_ranks(); @@ -663,19 +824,21 @@ std::vector> Compiler::compile(const char *pa lowered_partialgraph->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); auto executor = std::unique_ptr{ - ExecutorFactory::get().create(std::move(lowered_partialgraph), _options, executor_map)}; + ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)}; executor->setIndexedRanks(indexed_ranks); - executor_map->insert(std::make_pair(ir::SubgraphIndex{0}, std::move(executor))); - executors.push_back(executor_map); + executors->emplace(ir::SubgraphIndex{0}, std::move(executor)); + + // It doesn't support tracing in case of partial graph + results.push_back(std::make_shared(executors, nullptr)); } - _subgraphs.reset(); + _nnpkg.reset(); /******************************** * Code generation phase finished ********************************/ _state = State::COMPILED; - return executors; + return results; } } // namespace compiler diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index f9db1ca..024556e 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,23 +16,22 @@ #include "ExecutorFactory.h" -#include "backend/builtin/Config.h" -#include "backend/builtin/KernelGenerator.h" -#include "backend/builtin/TensorBuilder.h" -#include "backend/builtin/UserTensor.h" -#include "backend/IPortableTensor.h" -#include "compiler/BackendManager.h" -#include "compiler/BackendManager.h" -#include "compiler/ExecutionBuilder.h" -#include "compiler/Linear.h" -#include "dumper/text/GraphDumper.h" -#include "exec/DataflowExecutor.h" -#include "exec/ExecTime.h" -#include "exec/ExecutionObservers.h" -#include "exec/LinearExecutor.h" -#include "exec/ParallelExecutor.h" -#include "ir/OperationCloner.h" -#include "util/TracingCtx.h" +#include "Linear.h" +#include "../backend/builtin/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/UserTensor.h" +#include "../dumper/text/GraphDumper.h" +#include "../exec/DataflowExecutor.h" +#include "../exec/ExecTime.h" +#include "../exec/ExecutionObservers.h" +#include "../exec/LinearExecutor.h" +#include "../exec/ParallelExecutor.h" +#include "../ir/OperationCloner.h" + +#include +#include +#include +#include #include #include @@ -242,16 +241,17 @@ ExecutorFactory::ExecutorFactory() { _map["Linear"] = createLinearExecutor; _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, false); + std::placeholders::_3, std::placeholders::_4, false); _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, true); + std::placeholders::_3, std::placeholders::_4, true); } exec::IExecutor *ExecutorFactory::create(std::unique_ptr lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map) + const std::shared_ptr &executors) { - return _map.at(options.executor)(std::move(lowered_graph), options, executor_map); + return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors); } void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, @@ -282,7 +282,7 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap } void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, - const std::shared_ptr &executor_map, + const std::shared_ptr &executors, const backend::BackendContexts &backend_contexts) { for (auto &pair : backend_contexts) @@ -292,7 +292,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs, { auto builtin_kernel_gen = builtin_context->kernel_gen; builtin_kernel_gen->setTensorRegistries(tensor_regs); - builtin_kernel_gen->setExecutorMap(executor_map); + builtin_kernel_gen->setExecutors(executors); } } } @@ -317,12 +317,11 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con return ordered_contexts; } -exec::IExecutor * -ExecutorFactory::createLinearExecutor(std::unique_ptr lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map) +exec::IExecutor *ExecutorFactory::createLinearExecutor( + std::unique_ptr lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr &executors) { - auto graph = lowered_graph->graph(); + auto &graph = lowered_graph->graph(); backend::BackendContexts backend_contexts = createBackendContexts(*lowered_graph, options.executor == "Linear"); @@ -346,7 +345,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr lo prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts); ExecutionBuilder builder; @@ -426,14 +425,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr lo auto code_map = builder.releaseCodeMap(); - auto exec = new exec::LinearExecutor{ - std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order, - options.tracing_ctx}; + auto exec = new exec::LinearExecutor{std::move(lowered_graph), + std::move(backend_contexts), + tensor_regs, + std::move(code_map), + order, + tracing_ctx}; if (!options.trace_filepath.empty()) { - std::unique_ptr ctp = std::make_unique( - options.trace_filepath, exec->graph(), options.tracing_ctx); + std::unique_ptr ctp = + std::make_unique(options.trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } @@ -441,8 +443,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr lo } exec::IExecutor *ExecutorFactory::createDataflowExecutor( - std::unique_ptr lowered_graph, const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map, bool parallel) + std::unique_ptr lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr &executors, + bool parallel) { backend::BackendContexts backend_contexts = createBackendContexts(*lowered_graph, options.executor == "Linear"); @@ -462,7 +465,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( prepareMigrantTensors(*lowered_graph, backend_contexts); // Give some runtime objects to builtin KernelGenerator - prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts); + prepareBuiltinBackend(tensor_regs, executors, backend_contexts); ExecutionBuilder builder; @@ -491,13 +494,13 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (parallel) { exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts), - tensor_regs, std::move(code_map), options.tracing_ctx}; + tensor_regs, std::move(code_map), tracing_ctx}; } else { auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, - std::move(code_map), options.tracing_ctx}; + std::move(code_map), tracing_ctx}; if (options.he_profiling_mode) { std::vector backends; @@ -515,8 +518,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (!options.trace_filepath.empty()) { - std::unique_ptr ctp = std::make_unique( - options.trace_filepath, exec->graph(), options.tracing_ctx); + std::unique_ptr ctp = + std::make_unique(options.trace_filepath, exec->graph(), tracing_ctx); exec->addObserver(std::move(ctp)); } diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index 2ee05fa..70c089f 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -21,7 +21,7 @@ #include "backend/ITensor.h" #include "compiler/LoweredGraph.h" -#include "exec/IExecutor.h" +#include "exec/Executors.h" #include #include @@ -38,8 +38,9 @@ public: public: exec::IExecutor *create(std::unique_ptr lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map); + const std::shared_ptr &executors); private: ExecutorFactory(); @@ -48,25 +49,26 @@ private: static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph, const backend::BackendContexts &backend_contexts); static void prepareBuiltinBackend(const TensorRegistries &tensor_regs, - const std::shared_ptr &executor_map, + const std::shared_ptr &executors, const backend::BackendContexts &backend_contexts); static std::deque> orderBackendContext(const backend::BackendContexts &backend_contexts); - static exec::IExecutor * - createLinearExecutor(std::unique_ptr lowered_graph, - const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map); + static exec::IExecutor *createLinearExecutor( + std::unique_ptr lowered_graph, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr &executors); static exec::IExecutor * createDataflowExecutor(std::unique_ptr lowered_graph, + const util::TracingCtx *tracing_ctx, const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map, bool parallel); + const std::shared_ptr &executors, bool parallel); private: - std::unordered_map, - const compiler::CompilerOptions &options, - const std::shared_ptr &executor_map)>> + std::unordered_map< + std::string, + std::function, const util::TracingCtx *tracing_ctx, + const compiler::CompilerOptions &options, const std::shared_ptr &executors)>> _map; }; diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc index 5c1cef1..98dc906 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc @@ -180,7 +180,7 @@ void Fp32ToFp16Converter::appendOpSequences() { _lowered_graph.op_seqs().iterate( [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); // For now, the only acl_cl supports fully fp16 type @@ -375,7 +375,7 @@ void Fp32ToFp16Converter::convertOperands() { _lowered_graph.op_seqs().iterate( [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); // For now, the only acl_cl supports fully fp16 if (lower_info->backend()->config()->id() != kAclClBackendConfigId) @@ -515,7 +515,7 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OperandIndex &new_op_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto new_lower_info = std::make_unique(); auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout()); @@ -527,7 +527,7 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind, const ir::OpSequenceIndex &new_op_seq_ind) { - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto new_lower_info = @@ -635,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex const ir::OperationIndex &node_index) { auto &node = _lowered_graph.graph().operations().at(node_index); - const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind); + const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind); assert(lower_info != nullptr); auto layout = lower_info->layout(); diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc index 2f996c8..c4bfddb 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.cc @@ -14,17 +14,14 @@ * limitations under the License. */ -#include "ir/Operand.h" -#include "compiler/HEScheduler.h" -#include "ir/Graph.h" -#include "util/ConfigSource.h" +#include "HEScheduler.h" + #include "compiler/BackendResolver.h" +#include "ir/Graph.h" #include "util/logging.h" -#include "util/Utils.h" -#include "exec/FunctionSequence.h" + #include #include -#include namespace { diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h index 1a95b98..18ea388 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.h +++ b/runtime/onert/core/src/compiler/HEScheduler.h @@ -23,14 +23,16 @@ #ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_ #define __ONERT_COMPILER_H_E_SCHEDULER_H_ -#include "compiler/IScheduler.h" -#include "compiler/BackendManager.h" -#include "compiler/Compiler.h" -#include "ir/Graph.h" -#include "exec/ExecTime.h" -#include "backend/Backend.h" -#include -#include "ir/OperationIndexMap.h" +#include "IScheduler.h" +#include "../backend/builtin/Config.h" +#include "../exec/ExecTime.h" + +#include +#include +#include +#include +#include + #include #include diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc new file mode 100644 index 0000000..c4a2df0 --- /dev/null +++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "HEScheduler.h" +#include "../exec/ExecTime.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace +{ +using namespace onert; +using namespace ir; +using namespace backend; +using namespace operation; +using namespace exec; + +// +// Mock backends classes +// + +struct MockConfigCPU : public IConfig +{ + std::string id() override { return "cpu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +class MockBackendContext : public BackendContext +{ +public: + using BackendContext::BackendContext; + ITensorRegistry *genTensors() override { return nullptr; } + FunctionMap genKernels() override { return {}; } +}; + +struct MockBackendCPU : public Backend +{ + std::shared_ptr config() const override { return std::make_shared(); } + std::unique_ptr newContext(ContextData &&data) const override + { + return std::make_unique(this, std::move(data), nullptr); + } +}; + +struct MockConfigGPU : public IConfig +{ + std::string id() override { return "gpu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackendGPU : public Backend +{ + std::shared_ptr config() const override { return std::make_shared(); } + std::unique_ptr newContext(ContextData &&data) const override + { + return std::make_unique(this, std::move(data), nullptr); + } +}; + +struct MockConfigNPU : public IConfig +{ + std::string id() override { return "npu"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackendNPU : public Backend +{ + std::shared_ptr config() const override { return std::make_shared(); } + std::unique_ptr newContext(ContextData &&data) const override + { + return std::make_unique(this, std::move(data), nullptr); + } +}; + +// +// Constants +// + +const int OPERAND_ELEMS = 268203; +const int OPERAND_SIZE = OPERAND_ELEMS * 4; +const int OPERATION_SIZE = OPERAND_SIZE * 3; + +const std::string LINEAR("Linear"); +const std::string DATAFLOW("Dataflow"); +const std::string PARALLEL("Parallel"); + +// +// Helper functions +// + +// Set executor through environment variable +void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); } + +// Set profiling mode through environment variable +void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); } + +// Calculate operation size by addition sizes of all input and output operands +uint32_t calcOpSize(const std::shared_ptr &graph, const OperationIndex &op_idx) +{ + uint32_t size = 0; + const auto &op = graph->operations().at(op_idx); + for (const auto &ind : op.getInputs() + op.getOutputs()) + size += graph->operands().at(ind).info().total_size(); + return size; +} + +// Set execution operation time. This method is needed since ExecutionTime has only +// 'updateOperationExecTime' method. +void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set); + assert(et.getOperationExecTime(backend, operation, quant, op_size) == time); +} + +// Set same execution time for all given backends/operations +void setOperationsExecutionTime(const std::vector &backends, + const std::vector &op_names, + const std::vector &op_sizes, int64_t exec_time) +{ + assert(op_names.size() == op_sizes.size()); + ExecTime et(backends); + for (int i = 0; i < op_names.size(); ++i) + { + for (auto &backend : backends) + setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); + } + et.storeOperationsExecTime(); +} + +// Set permute time from one backend to another. This method is needed since ExecutionTime has only +// 'updatePermuteTime' method. +void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend, + bool quant, uint32_t op_size, int64_t time) +{ + // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it + assert(time > 0); + int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size); + int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; + et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set); + assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time); +} + +// Set same permutation time between all given backends +void setPermutationsExecutionTime(const std::vector &backends, + const int operand_size, const int64_t exec_time) +{ + ExecTime et(backends); + for (const auto &backend : backends) + { + for (auto &other_backend : backends) + { + if (backend == other_backend) + continue; + setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); + } + } + et.storeOperationsExecTime(); +} + +// +// Functions for creating graphs +// + +using OIS = OperandIndexSequence; + +template +OperationIndex create(std::shared_ptr graph, Types &&... args) +{ + auto op = std::make_unique(std::forward(args)...); + auto op_idx = graph->addOperation(std::move(op)); + // For now in scheduler test all operations in tested graphs has same size (for simplicity) + assert(calcOpSize(graph, op_idx) == OPERATION_SIZE); + return op_idx; +} + +// Create straight graph: Add->Sub->Mul +std::shared_ptr createStraightGraph() +{ + auto graph = std::make_shared(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); + + // Create sub node + auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params); + + // Create mul node + auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params); + + graph->verify(); + return graph; +} + +/* Create branched graph: + * [Add] + * // \\ + * [Mul1] [FC2] + * || || + * [Mul2] [FC2] + * \\ // + * [Sub] + */ +std::shared_ptr createBranchedGraph() +{ + auto graph = std::make_shared(); + const TypeInfo float_op(DataType::FLOAT32); + + // Create add node + auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); + + // Create mul1 node + auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}, + mul1_op_params); + + // Create mul2 node + auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}, + mul2_op_params); + + // Create fc1 node + auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + FullyConnected::Param fc1_op_params{Activation::NONE}; + create(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params); + + // Create fc2 node + auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + FullyConnected::Param fc2_op_params{Activation::NONE}; + create(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params); + + // Create sub node + auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params); + + graph->verify(); + return graph; +} + +// +// Tests setup/teardown +// + +// SetUp/TearDown methods runs before/after each test and performs actions common for each test +class HESchedulerTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Initialize mock backends + _cpu_backend = new MockBackendCPU(); + _gpu_backend = new MockBackendGPU(); + _npu_backend = new MockBackendNPU(); + _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend}; + + // Remove previous profile data if it exists + if (!remove("exec_time.json")) + { + // DO NOTHING (no profile data) + } + + // Remember original value of 'EXECUTOR' environment variable + char *executor = std::getenv("EXECUTOR"); + _original_executor = executor == nullptr ? "" : executor; + + // Remember original value of 'PROFILING_MODE' environment variable + char *profiling_mode = std::getenv("PROFILING_MODE"); + _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode; + } + + void TearDown() override + { + delete _cpu_backend; + delete _gpu_backend; + delete _npu_backend; + EXPECT_EQ(remove("exec_time.json"), 0); + setenv("EXECUTOR", _original_executor.c_str(), true); + setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true); + } + + const MockBackendCPU *_cpu_backend{nullptr}; + const MockBackendGPU *_gpu_backend{nullptr}; + const MockBackendNPU *_npu_backend{nullptr}; + std::vector _mock_backends; + + std::string _original_executor; + std::string _original_profiling_mode; +}; + +// +// HEScheduler tests +// + +class HESchedulerTestWithExecutorParam : public HESchedulerTest, + public testing::WithParamInterface +{ +}; + +// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - +// one time for each executor +INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam, + testing::Values(LINEAR, DATAFLOW, PARALLEL)); + +// Test scheduler behavior for straight graph with known execution time of all nodes and permutes. +TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) +{ + setExecutor(GetParam()); + + // Prepare graph + ir::Model model; + auto graph(createStraightGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: scheduler assigns different backend to each node + { + // For each backend reduce execution time of one node + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time + { + // Increase transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu"); + } +} + +// Test scheduler behavior for branched graph with known execution time of all nodes and permutes +TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) +{ + const int64_t NPU_ET = 5000; + setExecutor(GetParam()); + + // Prepare graph + ir::Model model; + auto graph(createBranchedGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Set default execution and transfer time + setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); + setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"}, + {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); + + // Test 1 + // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all + // nodes, in case of parallel executor scheduler assigns different backends to branches. + { + // Reduce execution time + ExecTime et(_mock_backends); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + + std::string branch1_expected_backend("npu"), branch2_expected_backend("npu"); + if (GetParam() == PARALLEL) + { + branch1_expected_backend = + br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; + branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; + } + + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } + + // Test 2 + // Expected behaviour: scheduler assigns single backend to all nodes + { + // Increase execution time for GPU backend + ExecTime et(_mock_backends); + /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt * + * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the + * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter + * branching or scheduler assigns another backend to a node*/ + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); + } +} + +// Test scheduler behavior for branched graph and enabled profiling mode +TEST_F(HESchedulerTest, branched_graph_profiling_mode) +{ + const int ET = 1e5; + + // Turn on profiling mode + setProfilingMode(true); + setExecutor(DATAFLOW); + + // Prepare graph + ir::Model model; + auto graph(createBranchedGraph()); + model.push(ir::SubgraphIndex{0}, graph); + OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), + sub_op_idx(5); + + // Test 1 + // Expected behaviour: scheduler assigns backends to nodes with unknown execution time + { + // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); + ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu"); + ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); + } + + // Test 2 + // Expected behaviour: scheduler shuffling backends, so different backends are assigned to + // neighbor nodes + { + // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC) + ExecTime et(_mock_backends); + setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); + setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); + et.storeOperationsExecTime(); + + // Test scheduler + auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig(); + auto scheduler = compiler::HEScheduler(_mock_backends, coptions); + const auto br = scheduler.schedule(*graph); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(mul1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), + br->getBackend(fc1_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(), + br->getBackend(mul2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(), + br->getBackend(fc2_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(), + br->getBackend(sub_op_idx)->config()->id()); + } +} + +// TODO: Add tests with unknown execution and permutation time + +} // unnamed namespace diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 73ba962..f85b8d1 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -14,15 +14,13 @@ * limitations under the License. */ -#include -#include - #include "Linear.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" +#include "../dumper/text/GraphDumper.h" + #include "util/logging.h" -#include "dumper/text/GraphDumper.h" + +#include namespace onert { diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 999bffa..9e84753 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -16,24 +16,23 @@ #include "compiler/LoweredGraph.h" -#include -#include -#include -#include "util/logging.h" -#include "compiler/pass/ConstantInsertionPass.h" -#include "compiler/pass/ConstantLoweringPass.h" -#include "compiler/pass/PassRunner.h" -#include "compiler/pass/PermutationOperationPass.h" -#include "compiler/pass/PermutationInsertionPass.h" -#include "compiler/pass/PermutationEliminationPass.h" -#include "dumper/text/GraphDumper.h" -#include "ir/verifier/Verifier.h" +#include "HEScheduler.h" +#include "ManualScheduler.h" +#include "pass/ConstantInsertionPass.h" +#include "pass/ConstantLoweringPass.h" +#include "pass/PassRunner.h" +#include "pass/PermutationEliminationPass.h" +#include "pass/PermutationInsertionPass.h" +#include "pass/PermutationOperationPass.h" +#include "../dumper/text/GraphDumper.h" +#include "../ir/verifier/Verifier.h" + #include "backend/Backend.h" -#include "backend/IConfig.h" #include "compiler/BackendResolver.h" -#include "compiler/ManualScheduler.h" -#include "compiler/HEScheduler.h" -#include "util/TracingCtx.h" +#include "util/logging.h" + +#include +#include namespace onert { @@ -42,7 +41,7 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { - lowerGraph(graph, options); + lowerGraph(options); } // TODO Design better class and constructor to represent parent_graph @@ -50,18 +49,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph const CompilerOptions &options) : _graph{graph}, _parent_graph{parent_graph} { - lowerGraph(graph, options); + lowerGraph(options); } -void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options) +void LoweredGraph::lowerGraph(const CompilerOptions &options) { - // set tracing_ctx for copied graph - if (options.tracing_ctx) - { - auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph); - options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value()); - } - // Build backend contexts auto &backend_manager = BackendManager::get(); // Create contexts for other backends diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc index 1c70009..8c64217 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.cc +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -34,77 +34,72 @@ namespace onert namespace compiler { -ShapeValidator::ShapeValidator(const ir::Graph &graph) - : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN} -{ -} +ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {} void ShapeValidator::checkUnaryOp(const ir::Operation &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; // Check if I/O shapes match - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::operator()() { - // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when - // creating Compiler - assert(_graph.subgraphs() == nullptr); - - _current_layout = _graph.layout(); - _graph.operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); } void ShapeValidator::visit(const ir::operation::BatchMatMul &node) { + const auto &operands = _graph.operands(); const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS)); const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS)); const auto out_index{node.getOutputs().at(0)}; - if (_ctx.at(out_index).info().isDynamic()) + if (operands.at(out_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2); - OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2); + OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2); + OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2); } void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)}; const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); if (node.getInputs().size() != 2) { const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; - OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2)); - OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(crops_index).shape().rank() == 2); + OP_REQUIRES(operands.at(crops_index).shape().dim(0) == + (operands.at(ifm_index).shape().rank() - 2)); + OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2); } OP_REQUIRES(input_shape.C == output_shape.C); @@ -112,8 +107,9 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; @@ -125,16 +121,16 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1)); + OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1)); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0); - OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2); // more shape validation will be done inside kernel. @@ -143,8 +139,9 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node) void ShapeValidator::visit(const ir::operation::BCQGather &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; @@ -153,13 +150,14 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node) const auto input_clusters_index{ node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; - OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more - OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2); + OP_REQUIRES(operands.at(indices_index).shape().rank() <= + 2); // TODO : support rank up to 4 or more + OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2); + OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1); + OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0); - OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0); + OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2); // more shape validation will be done inside kernel. } @@ -171,62 +169,67 @@ void ShapeValidator::visit(const ir::operation::Comparison &) void ShapeValidator::visit(const ir::operation::Softmax &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::InstanceNorm &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)}; const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape()); - OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape()); + OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1); + OP_REQUIRES(operands.at(beta_index).shape().rank() == 1); } void ShapeValidator::visit(const ir::operation::Pool2D &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); } void ShapeValidator::visit(const ir::operation::Permute &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Reduce &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto input_shape = _ctx.at(input_index).shape(); - const auto output_shape = _ctx.at(output_index).shape(); + const auto input_shape = operands.at(input_index).shape(); + const auto output_shape = operands.at(output_index).shape(); OP_REQUIRES(input_shape.rank() <= 4); OP_REQUIRES(output_shape.rank() <= input_shape.rank()); @@ -266,18 +269,20 @@ void ShapeValidator::visit(const ir::operation::Reduce &node) void ShapeValidator::visit(const ir::operation::Transpose &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; - const auto &output_shape = _ctx.at(output_index).shape(); - const auto &input_shape = _ctx.at(input_index).shape(); + const auto &output_shape = operands.at(output_index).shape(); + const auto &input_shape = operands.at(input_index).shape(); - OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 || - input_shape.rank() == static_cast(_ctx.at(perm_index).shape().num_elements())); + OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 || + input_shape.rank() == + static_cast(operands.at(perm_index).shape().num_elements())); OP_REQUIRES(input_shape.rank() == output_shape.rank()); } @@ -285,8 +290,9 @@ void ShapeValidator::visit(const ir::operation::RNN &node) { // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn // TODO Support dynamic rnn + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto hidden_state_out_index{ @@ -299,35 +305,36 @@ void ShapeValidator::visit(const ir::operation::RNN &node) const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)}; const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)}; - const auto batch_size = _ctx.at(output_index).shape().dim(0); - const auto num_units = _ctx.at(output_index).shape().dim(1); - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 && - _ctx.at(hidden_state_out_index).shape().rank() == 2 && - _ctx.at(input_index).shape().rank() == 2 && - _ctx.at(weights_index).shape().rank() == 2 && - _ctx.at(recurrent_weights_index).shape().rank() == 2 && - _ctx.at(hidden_state_in_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1); - - OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) && - batch_size == _ctx.at(hidden_state_out_index).shape().dim(0)); - OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1)); - - OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(0) && - num_units == _ctx.at(bias_index).shape().dim(0)); - OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) && - num_units == _ctx.at(recurrent_weights_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_in_index).shape().dim(1) && - num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); + const auto batch_size = operands.at(output_index).shape().dim(0); + const auto num_units = operands.at(output_index).shape().dim(1); + + OP_REQUIRES(operands.at(output_index).shape().rank() == 2 && + operands.at(hidden_state_out_index).shape().rank() == 2 && + operands.at(input_index).shape().rank() == 2 && + operands.at(weights_index).shape().rank() == 2 && + operands.at(recurrent_weights_index).shape().rank() == 2 && + operands.at(hidden_state_in_index).shape().rank() == 2); + OP_REQUIRES(operands.at(bias_index).shape().rank() == 1); + + OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) && + batch_size == operands.at(hidden_state_in_index).shape().dim(0) && + batch_size == operands.at(hidden_state_out_index).shape().dim(0)); + OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1)); + + OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) && + num_units == operands.at(recurrent_weights_index).shape().dim(0) && + num_units == operands.at(bias_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) && + num_units == operands.at(recurrent_weights_index).shape().dim(1) && + num_units == operands.at(hidden_state_in_index).shape().dim(1) && + num_units == operands.at(hidden_state_out_index).shape().dim(1)); } void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; @@ -335,39 +342,40 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); // All requirement as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1); + OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2); - OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2); - OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2); + OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2); + OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2); OP_REQUIRES(input_shape.C == output_shape.C); } void ShapeValidator::visit(const ir::operation::SpaceToDepth &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; - const auto frontend_layout = _current_layout; - const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); - const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); + const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); const auto block_size = node.param().block_size; // All assertions as per NNAPI specification. - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0)); OP_REQUIRES(input_shape.N == output_shape.N); OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C); @@ -382,29 +390,31 @@ void ShapeValidator::visit(const ir::operation::ElementwiseBinary &) void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - const auto &output_obj = _ctx.at(output_index); - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &values_obj = _ctx.at(values_index); + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &values_obj = operands.at(values_index); // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729) { - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto &output_shape = output_obj.shape(); @@ -427,26 +437,28 @@ void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node) void ShapeValidator::visit(const ir::operation::ExpandDims &node) { + const auto &operands = _graph.operands(); const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - if (_ctx.at(axis_index).info().isDynamic()) + if (operands.at(axis_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1); + OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1); } void ShapeValidator::visit(const ir::operation::HashtableLookup &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)}; const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - const auto &output_obj = _ctx.at(output_index); - const auto &lookups_obj = _ctx.at(lookups_index); - const auto &keys_obj = _ctx.at(keys_index); - const auto &values_obj = _ctx.at(values_index); + const auto &output_obj = operands.at(output_index); + const auto &lookups_obj = operands.at(lookups_index); + const auto &keys_obj = operands.at(keys_index); + const auto &values_obj = operands.at(values_index); - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto &output_shape = output_obj.shape(); @@ -464,28 +476,30 @@ void ShapeValidator::visit(const ir::operation::HashtableLookup &node) void ShapeValidator::visit(const ir::operation::TransposeConv &node) { // shape check + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; // Only 4D tensors are supported - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); - OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank()); + OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank()); - const auto frontend_layout = _current_layout; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout); + const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout); // The kernel has only IHWO layout on frontend // So ker_shape is treated here below // I -> N // H -> H // W -> W // O -> C - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC); + const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC); OP_REQUIRES(ifm_shape.N == ofm_shape.N); OP_REQUIRES(ifm_shape.C == ker_shape.C); @@ -494,16 +508,17 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node) void ShapeValidator::visit(const ir::operation::Gather &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; - const auto ifm_shape = _ctx.at(ifm_index).shape(); - const auto indices_shape = _ctx.at(indices_index).shape(); - const auto ofm_shape = _ctx.at(ofm_index).shape(); + const auto ifm_shape = operands.at(ifm_index).shape(); + const auto indices_shape = operands.at(indices_index).shape(); + const auto ofm_shape = operands.at(ofm_index).shape(); OP_REQUIRES(ifm_shape.rank() <= 4); OP_REQUIRES(indices_shape.rank() <= 3); @@ -512,21 +527,22 @@ void ShapeValidator::visit(const ir::operation::Gather &node) void ShapeValidator::visit(const ir::operation::DepthToSpace &node) { + const auto &operands = _graph.operands(); int32_t block_size = node.param().block_size; // shape check const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; - const auto frontend_layout = _current_layout; - const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); - const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); + const auto frontend_layout = _graph.layout(); + const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout); + const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); { OP_REQUIRES(output_shape.N == input_shape.N); @@ -539,22 +555,23 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node) void ShapeValidator::visit(const ir::operation::Pack &node) { + const auto &operands = _graph.operands(); const auto axis{node.param().axis}; const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; // shape check - const auto &output_shape = _ctx.at(output_index).shape(); + const auto &output_shape = operands.at(output_index).shape(); const auto output_rank = static_cast(output_shape.rank()); const auto input1_index{node.getInputs().at(0)}; - const auto input_shape = _ctx.at(input1_index).shape(); + const auto input_shape = operands.at(input1_index).shape(); OP_REQUIRES(axis >= -output_rank && axis < output_rank); for (const auto &index : node.getInputs()) { - OP_REQUIRES(input_shape == _ctx.at(index).shape()); + OP_REQUIRES(input_shape == operands.at(index).shape()); } } @@ -562,8 +579,9 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) { // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn // TODO Support dynamic rnn + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto scratch_buffer_index{ @@ -611,91 +629,96 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); - for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i) + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); + for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i) { - OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i)); + OP_REQUIRES(operands.at(input_index).shape().dim(i) == + operands.at(output_index).shape().dim(i)); } - OP_REQUIRES( - (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) && - (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) && - (!_ctx.exist(input_to_input_weights_index) || - _ctx.at(input_to_input_weights_index).shape().rank() == 2) && - _ctx.at(input_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(input_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(input_to_output_weights_index).shape().rank() == 2 && - (!_ctx.exist(recurrent_to_input_weights_index) || - _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) && - _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 && - _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 && - (!_ctx.exist(projection_weights_index) || - _ctx.at(projection_weights_index).shape().rank() == 2) && - _ctx.at(output_state_in_index).shape().rank() == 2 && - _ctx.at(cell_state_in_index).shape().rank() == 2); - - OP_REQUIRES( - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().rank() == 1) && - (!_ctx.exist(cell_to_forget_weights_index) || - _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) && - (!_ctx.exist(cell_to_output_weights_index) || - _ctx.at(cell_to_output_weights_index).shape().rank() == 1) && - (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) && - _ctx.at(forget_gate_bias_index).shape().rank() == 1 && - _ctx.at(cell_bias_index).shape().rank() == 1 && - _ctx.at(output_gate_bias_index).shape().rank() == 1 && - (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1)); + OP_REQUIRES((operands.at(output_index).shape().rank() == 2 || + operands.at(output_index).shape().rank() == 3) && + (operands.at(input_index).shape().rank() == 2 || + operands.at(input_index).shape().rank() == 3) && + (!operands.exist(input_to_input_weights_index) || + operands.at(input_to_input_weights_index).shape().rank() == 2) && + operands.at(input_to_forget_weights_index).shape().rank() == 2 && + operands.at(input_to_cell_weights_index).shape().rank() == 2 && + operands.at(input_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(recurrent_to_input_weights_index) || + operands.at(recurrent_to_input_weights_index).shape().rank() == 2) && + operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 && + operands.at(recurrent_to_output_weights_index).shape().rank() == 2 && + (!operands.exist(projection_weights_index) || + operands.at(projection_weights_index).shape().rank() == 2) && + operands.at(output_state_in_index).shape().rank() == 2 && + operands.at(cell_state_in_index).shape().rank() == 2); + + OP_REQUIRES((!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().rank() == 1) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().rank() == 1) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().rank() == 1) && + operands.at(forget_gate_bias_index).shape().rank() == 1 && + operands.at(cell_bias_index).shape().rank() == 1 && + operands.at(output_gate_bias_index).shape().rank() == 1 && + (!operands.exist(projection_bias_index) || + operands.at(projection_bias_index).shape().rank() == 1)); // CIFG assertion - OP_REQUIRES( - ((!_ctx.exist(input_to_input_weights_index) || - (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) && - (!_ctx.exist(recurrent_to_input_weights_index) || - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) && - (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) && - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) || - ((_ctx.exist(input_to_input_weights_index) && - (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) && - (_ctx.exist(recurrent_to_input_weights_index) && - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) && - (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0))); + OP_REQUIRES(((!operands.exist(input_to_input_weights_index) || + (operands.at(input_to_input_weights_index).shape().dim(0) == 0 && + operands.at(input_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(recurrent_to_input_weights_index) || + (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) && + (!operands.exist(input_gate_bias_index) || + operands.at(input_gate_bias_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) || + ((operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) && + (operands.exist(input_gate_bias_index) && + operands.at(input_gate_bias_index).shape().dim(0) != 0))); // Peephole assertion - OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) || - _ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0) && - (!_ctx.exist(cell_to_output_weights_index) || - _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0)) || - ((_ctx.exist(cell_to_forget_weights_index) && - _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0) && - (_ctx.exist(cell_to_output_weights_index) && - _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0))); - - bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) && - (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0); + OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) || + operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) && + (!operands.exist(cell_to_output_weights_index) || + operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) || + ((operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) && + (operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0))); + + bool has_input_to_input_weights = + operands.exist(input_to_input_weights_index) && + (operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0); bool has_recurrent_to_input_weights = - _ctx.exist(recurrent_to_input_weights_index) && - (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0); + operands.exist(recurrent_to_input_weights_index) && + (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0); bool has_input_gate_bias = - _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0; - bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) && - _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0; - bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) && - _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; - bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) && - _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; - bool has_projection_weights = _ctx.exist(projection_weights_index) && - (_ctx.at(projection_weights_index).shape().dim(0) != 0 && - _ctx.at(projection_weights_index).shape().dim(1) != 0); + operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0; + bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) && + operands.at(cell_to_input_weights_index).shape().dim(0) != 0; + bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) && + operands.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) && + operands.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = operands.exist(projection_weights_index) && + (operands.at(projection_weights_index).shape().dim(0) != 0 && + operands.at(projection_weights_index).shape().dim(1) != 0); bool has_projection_bias = - _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0; + operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0; // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). // true: no CIFG @@ -710,46 +733,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) // NOTE The projection weights may have data but the projection bias may not. bool has_projection_param = has_projection_weights; - const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major) - ? _ctx.at(input_index).shape().dim(1) - : _ctx.at(input_index).shape().dim(0); - OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) && - batch_size == _ctx.at(cell_state_in_index).shape().dim(0)); - - const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1); - OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) && - input_size == _ctx.at(input_to_output_weights_index).shape().dim(1)); - - const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0); - OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) && - num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_bias_index).shape().dim(0) && - num_units == _ctx.at(output_gate_bias_index).shape().dim(0) && - num_units == _ctx.at(cell_state_in_index).shape().dim(1)); + const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major) + ? operands.at(input_index).shape().dim(1) + : operands.at(input_index).shape().dim(0); + OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) && + batch_size == operands.at(cell_state_in_index).shape().dim(0)); + + const auto input_size = + operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1); + OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) && + input_size == operands.at(input_to_cell_weights_index).shape().dim(1) && + input_size == operands.at(input_to_output_weights_index).shape().dim(1)); + + const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0); + OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(input_to_output_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) && + num_units == operands.at(forget_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_bias_index).shape().dim(0) && + num_units == operands.at(output_gate_bias_index).shape().dim(0) && + num_units == operands.at(cell_state_in_index).shape().dim(1)); const auto output_size = - _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) && - output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) && - output_size == _ctx.at(output_state_in_index).shape().dim(1)); + operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1); + OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) && + output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) && + output_size == operands.at(output_state_in_index).shape().dim(1)); if (has_cifg_param) { - OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1)); - OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) && - num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) && - ((_ctx.exist(cell_to_input_weights_index) && - num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0)) || - (!_ctx.exist(cell_to_input_weights_index) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) && - num_units == _ctx.at(input_gate_bias_index).shape().dim(0)); - OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1)); + OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1)); + OP_REQUIRES( + num_units == operands.at(input_to_input_weights_index).shape().dim(0) && + num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) && + ((operands.exist(cell_to_input_weights_index) && + num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) || + (!operands.exist(cell_to_input_weights_index) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) && + num_units == operands.at(input_gate_bias_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1)); OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights && has_input_gate_bias); if (has_cell_to_input_weights) @@ -757,64 +782,65 @@ void ShapeValidator::visit(const ir::operation::LSTM &node) // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole. OP_REQUIRES(has_peephole_param); } - if (_ctx.exist(scratch_buffer_index)) - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4); + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4); } else { - if (_ctx.exist(scratch_buffer_index)) - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3); + if (operands.exist(scratch_buffer_index)) + OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3); } if (has_peephole_param) { - OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) && - num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) && - (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) || - _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); + OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) && + num_units == operands.at(cell_to_output_weights_index).shape().dim(0) && + (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) || + operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */)); } if (has_projection_param) { - OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1)); - OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1)); + OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0)); if (has_projection_bias) { - OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0)); } } - if (_ctx.exist(scratch_buffer_index)) + if (operands.exist(scratch_buffer_index)) { - OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0)); + OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0)); } - if (_ctx.exist(output_state_out_index)) + if (operands.exist(output_state_out_index)) { - OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0)); - OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1)); + OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0)); + OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1)); } - if (_ctx.exist(cell_state_out_index)) + if (operands.exist(cell_state_out_index)) { - OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2); - OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0)); - OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1)); + OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2); + OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0)); + OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1)); } } void ShapeValidator::visit(const ir::operation::L2Normalization &node) { + const auto &operands = _graph.operands(); const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) + if (operands.at(ofm_index).info().isDynamic()) return; const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)}; - auto ifm_shape = _ctx.at(ifm_index).shape(); - auto ofm_shape = _ctx.at(ofm_index).shape(); + auto ifm_shape = operands.at(ifm_index).shape(); + auto ofm_shape = operands.at(ofm_index).shape(); OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank()); @@ -826,14 +852,15 @@ void ShapeValidator::visit(const ir::operation::L2Normalization &node) void ShapeValidator::visit(const ir::operation::Unpack &node) { + const auto &operands = _graph.operands(); const auto axis{node.param().axis}; const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)}; - const auto &input_shape = _ctx.at(input_index).shape(); + const auto &input_shape = operands.at(input_index).shape(); const auto input_rank = static_cast(input_shape.rank()); OP_REQUIRES(axis >= -input_rank && axis < input_rank); @@ -841,22 +868,23 @@ void ShapeValidator::visit(const ir::operation::Unpack &node) void ShapeValidator::visit(const ir::operation::Pad &node) { + const auto &operands = _graph.operands(); const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; - OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32); + OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32); const auto output_index{node.getInputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; - const auto &pad_shape = _ctx.at(pad_index).shape(); - const auto input_rank = static_cast(_ctx.at(input_index).shape().rank()); + const auto &pad_shape = operands.at(pad_index).shape(); + const auto input_rank = static_cast(operands.at(input_index).shape().rank()); OP_REQUIRES(pad_shape.rank() == 2); OP_REQUIRES(pad_shape.dim(0) == input_rank); OP_REQUIRES(pad_shape.dim(1) == 2); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Select &) @@ -866,65 +894,70 @@ void ShapeValidator::visit(const ir::operation::Select &) void ShapeValidator::visit(const ir::operation::StridedSlice &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4); + OP_REQUIRES(operands.at(input_index).shape().rank() <= 4); } void ShapeValidator::visit(const ir::operation::Split &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)}; const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)}; const auto num_splits = node.param().num_splits; - const auto input_rank = _ctx.at(input_index).shape().rank(); - auto axis = *reinterpret_cast(_ctx.at(axis_index).data()->base()); + const auto input_rank = operands.at(input_index).shape().rank(); + auto axis = *reinterpret_cast(operands.at(axis_index).data()->base()); axis = axis < 0 ? axis + input_rank : axis; OP_REQUIRES(axis >= 0 && axis < input_rank); - OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0); + OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0); } void ShapeValidator::visit(const ir::operation::Shape &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; UNUSED_RELEASE(input_index); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1); + OP_REQUIRES(operands.at(output_index).shape().rank() == 1); } void ShapeValidator::visit(const ir::operation::ResizeBilinear &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) { return; } - OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4); - OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4); + OP_REQUIRES(operands.at(input_index).shape().rank() == 4); + OP_REQUIRES(operands.at(output_index).shape().rank() == 4); } void ShapeValidator::visit(const ir::operation::Reverse &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); + OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape()); } void ShapeValidator::visit(const ir::operation::If &) @@ -940,17 +973,18 @@ void ShapeValidator::visit(const ir::operation::While &) void ShapeValidator::visit(const ir::operation::SquaredDifference &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - auto output_shape = _ctx.at(output_index).shape(); - auto lhs_shape = _ctx.at(lhs_index).shape(); - auto rhs_shape = _ctx.at(rhs_index).shape(); + auto output_shape = operands.at(output_index).shape(); + auto lhs_shape = operands.at(lhs_index).shape(); + auto rhs_shape = operands.at(rhs_index).shape(); // Check for output rank OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank())); auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank()); @@ -982,36 +1016,40 @@ void ShapeValidator::visit(const ir::operation::SquaredDifference &node) } void ShapeValidator::visit(const ir::operation::Tile &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; const auto multiple_index{node.getInputs().at(1)}; - OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1); - OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank()); - OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); + OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1); + OP_REQUIRES(operands.at(multiple_index).shape().dim(0) == + operands.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank()); } void ShapeValidator::visit(const ir::operation::Range &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)}; const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)}; const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0); - OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0); + OP_REQUIRES(operands.at(start_index).shape().rank() == 0); + OP_REQUIRES(operands.at(limit_index).shape().rank() == 0); + OP_REQUIRES(operands.at(delta_index).shape().rank() == 0); } void ShapeValidator::visit(const ir::operation::MatrixBandPart &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)}; const auto num_lower_index{ @@ -1020,23 +1058,24 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node) node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)}; // Check for dimension constraints - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; - OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix - OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar - OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar + OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix + OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar + OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar } void ShapeValidator::visit(const ir::operation::LogSoftmax &node) { + const auto &operands = _graph.operands(); const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) + if (operands.at(output_index).info().isDynamic()) return; const auto input_index{node.getInputs().at(0)}; - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); + OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank()); } } // namespace compiler diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h index 763cf7c..a51e8ad 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.h +++ b/runtime/onert/core/src/compiler/ShapeValidator.h @@ -39,8 +39,13 @@ class ShapeValidator : public ir::OperationVisitor public: ShapeValidator(void) = delete; ShapeValidator(const ir::Graph &graph); + ShapeValidator(const ShapeValidator &) = delete; + ShapeValidator(ShapeValidator &&) = delete; + ~ShapeValidator() = default; public: + ShapeValidator &operator=(const ShapeValidator &) = delete; + ShapeValidator &operator=(ShapeValidator &&) = delete; void operator()(); public: @@ -90,10 +95,7 @@ private: void checkUnaryOp(const ir::Operation &node); private: - // TODO Remove _ctx field const ir::Graph &_graph; - const ir::Operands &_ctx; - ir::Layout _current_layout; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index f2fee2c..4854505 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -19,62 +19,90 @@ #include "util/logging.h" #include +#include namespace onert { namespace compiler { - -void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind) +void OperandObserver::updateShapes(const std::vector &changed_operands_info, + bool unpredictable) { - StaticShapeInferer inferer(subg_ind, _lowered_subgs); - auto &lgraph = _lowered_subgs.at(subg_ind); - for (auto op_ind : lgraph->graph().topolSortOperations()) + assert(changed_operands_info.size() == _operands.size()); + for (size_t i = 0; i < changed_operands_info.size(); ++i) { - auto &op = lgraph->graph().operations().at(op_ind); - bool has_dynamic_tensor = inferer.infer(op); - lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor); + const auto &changed_operand_info = changed_operands_info.at(i); + auto &operand = _operands.at(i); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // assert(changed_operand_info.typeInfo() == operand->typeInfo()); + // This error check may by replaced by an assertion if this function is called after the + // validation of models are completed. + if (changed_operand_info.typeInfo() != operand->typeInfo()) + { + throw std::runtime_error("OperandObserver: The types of operands are mismatched"); + } + if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable)) + { + operand->info().setDynamic(); + } + else + { + const auto &new_shape = changed_operands_info.at(i).shape(); + operand->info().shape(new_shape); + } } } -bool StaticShapeInferer::infer(const ir::Operation &op) +void StaticShapeInferer::infer() { - bool has_dynamic_tensor = false; - - auto opcode = op.opcode(); - - _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit() - - // IF: need shape inference for then, else - // While: need shape inference for condition, body - if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) - { - op.accept(*this); - } - else + for (const auto &op_idx : _lowered_subg->graph().topolSortOperations()) { - _return_has_dynamic_tensor = checkDynamicInput(op); - - if (_return_has_dynamic_tensor) + const auto &op = _lowered_subg->graph().operations().at(op_idx); + bool has_dynamic_tensor = false; + const auto opcode = op.opcode(); + // IF: requires shape inference for then, else + // While: requires shape inference for condition, body + if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) { - setDynamicOutput(op); + op.accept(*this); } else { - op.accept(*this); + has_dynamic_tensor = checkDynamicInput(op); + if (has_dynamic_tensor) + { + setDynamicOutput(op); + } + else + { + op.accept(*this); + } } + has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op); + _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor); } - has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor; - - return has_dynamic_tensor; + if (_controlflow_output_observer != nullptr) + { + // re-sizing output shapes of the controflow operation branching to this subgraph + std::vector outputs_info; + const auto &graph = _lowered_subg->graph(); + const auto &outputs = graph.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + const auto &operand_info = graph.operands().at(outputs.at(i)).info(); + outputs_info.emplace_back(operand_info); + } + _controlflow_output_observer->updateShapes(outputs_info); + } } bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) { + const auto &operands = _lowered_subg->graph().operands(); for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) { - if (_operands.at(input_idx).info().isDynamic()) + if (operands.at(input_idx).info().isDynamic()) { return true; } @@ -83,11 +111,25 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) return false; } +bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op) +{ + auto &operands = _lowered_subg->graph().operands(); + for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) + { + if (operands.at(output_idx).info().isDynamic()) + { + return true; + } + } + return false; +} + void StaticShapeInferer::setDynamicOutput(const ir::Operation &op) { + auto &operands = _lowered_subg->graph().operands(); for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED) { - _operands.at(output_idx).info().setDynamic(); + operands.at(output_idx).info().setDynamic(); } } @@ -95,11 +137,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx, const ir::OperandIndex rhs_idx) { - const auto &lhs = _operands.at(lhs_idx); - const auto &rhs = _operands.at(rhs_idx); + auto &operands = _lowered_subg->graph().operands(); + const auto &lhs = operands.at(lhs_idx); + const auto &rhs = operands.at(rhs_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape()); @@ -109,11 +152,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx) { - const auto &input = _operands.at(input_idx); + auto &operands = _lowered_subg->graph().operands(); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = input.info().shape(); @@ -136,36 +180,31 @@ void StaticShapeInferer::dump() return sstream.str(); }; - for (const auto &pair : _lowered_subgs) - { - const auto index = pair.first; - const auto &lowered_subg = pair.second; - VERBOSE(StaticShapeInferer) << index << std::endl; - lowered_subg->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &operand) { - VERBOSE(StaticShapeInferer) - << " " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " - << get_shape_str(operand.info().shape()) << std::endl; - }); - } + _lowered_subg->graph().operands().iterate( + [&](const ir::OperandIndex &ind, const ir::Operand &operand) { + VERBOSE(StaticShapeInferer) << " " << ind << ", " + << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", " + << get_shape_str(operand.info().shape()) << std::endl; + }); } void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!axis.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -181,27 +220,31 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS); const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS); const auto output_index = op.getOutputs().at(0); - const auto &lhs = _operands.at(lhs_index); - const auto &rhs = _operands.at(rhs_index); - auto &output = _operands.at(output_index); + const auto &lhs = operands.at(lhs_index); + const auto &rhs = operands.at(rhs_index); + auto &output = operands.at(output_index); auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param()); output.info().shape(new_shape); } void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto cluster_idx{ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)}; - const auto &cluster = _operands.at(cluster_idx); + const auto &cluster = operands.at(cluster_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); auto cluster_buf = reinterpret_cast(cluster.data()->base()); assert(cluster_buf); @@ -214,17 +257,19 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op) void StaticShapeInferer::visit(const ir::operation::BCQGather &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)}; - const auto &indices = _operands.at(indices_idx); + const auto &indices = operands.at(indices_idx); const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)}; - const auto &input_binary = _operands.at(input_binary_idx); + const auto &input_binary = operands.at(input_binary_idx); const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)}; - const auto &cluster = _operands.at(cluster_idx); + const auto &cluster = operands.at(cluster_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); auto cluster_buf = reinterpret_cast(cluster.data()->base()); assert(cluster_buf); @@ -247,16 +292,16 @@ void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op) void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) { // get mutable output operand + auto &operands = _lowered_subg->graph().operands(); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); if (!shape.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -276,16 +321,18 @@ void StaticShapeInferer::visit(const ir::operation::Comparison &op) void StaticShapeInferer::visit(const ir::operation::Concat &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_count = op.getInputs().size(); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); shape_inference::Shapes input_shapes; for (uint32_t i = 0; i < input_count; i++) { const auto input_idx{op.getInputs().at(i)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); input_shapes.emplace_back(input.shape()); } @@ -297,12 +344,14 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op) void StaticShapeInferer::visit(const ir::operation::Conv2D &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)}; - const auto &ker = _operands.at(ker_idx); + const auto &ker = operands.at(ker_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = @@ -328,17 +377,18 @@ void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op) void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!axis.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -360,15 +410,16 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) void StaticShapeInferer::visit(const ir::operation::Fill &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!shape.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -390,15 +441,17 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op) void StaticShapeInferer::visit(const ir::operation::FullyConnected &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)}; - const auto &ker = _operands.at(ker_idx); + const auto &ker = operands.at(ker_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape()); @@ -412,15 +465,17 @@ void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op) void StaticShapeInferer::visit(const ir::operation::Gather &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)}; - const auto &indices = _operands.at(indices_idx); + const auto &indices = operands.at(indices_idx); const auto rank = input.info().shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -434,70 +489,21 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op) void StaticShapeInferer::visit(const ir::operation::If &op) { - auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph(); - auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph(); + // re-sizing input shapes of then/else subgraph const std::vector inputs{op.getInputs().begin() + 1, op.getInputs().end()}; - const auto &outputs = op.getOutputs(); - // re-sizing input shapes of then subgraph - const auto &then_inputs = then_graph.getInputs(); - assert(inputs.size() == then_inputs.size()); + std::vector inputs_info; + const auto &graph = _lowered_subg->graph(); for (size_t i = 0; i < inputs.size(); ++i) { - auto &then_input = then_graph.operands().at(then_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - then_input.info().setDynamic(); - } - else - { - auto new_shape = _operands.at(inputs.at(i)).info().shape(); - then_input.info().shape(new_shape); - } + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); } + _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().then_subg_index)->infer(); - // re-sizing input shapes of else subgraph - const auto &else_inputs = else_graph.getInputs(); - assert(inputs.size() == else_inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) - { - auto &else_input = else_graph.operands().at(else_inputs.at(i)); - if (_operands.at(inputs.at(i)).info().isDynamic()) - { - else_input.info().setDynamic(); - } - else - { - const auto &new_shape = _operands.at(inputs.at(i)).info().shape(); - else_input.info().shape(new_shape); - } - } - - inferSubgraph(op.param().then_subg_index); - inferSubgraph(op.param().else_subg_index); - - // re-sizing output shapes - // TODO use then_graph / else_graph instead - const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs(); - const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs(); - assert(outputs.size() == then_outputs.size()); - assert(outputs.size() == else_outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) - { - const auto &then_output = then_graph.operands().at(then_outputs.at(i)); - const auto &else_output = else_graph.operands().at(else_outputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (!then_output.info().isDynamic() && !else_output.info().isDynamic() && - then_output.shape() == else_output.shape()) - { - output.info().shape(then_output.shape()); - } - else - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - } + _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info); + _child_inferers.at(op.param().else_subg_index)->infer(); } void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) @@ -507,8 +513,10 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) void StaticShapeInferer::visit(const ir::operation::LSTM &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - auto &output = _operands.at(output_index); + auto &output = operands.at(output_index); const auto output_state_out_index{ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; @@ -518,24 +526,24 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; if (output.info().isDynamic() || - (_operands.exist(output_state_out_index) && - _operands.at(output_state_out_index).info().isDynamic()) || - (_operands.exist(cell_state_out_index) && - _operands.at(cell_state_out_index).info().isDynamic()) || - (_operands.exist(scratch_buffer_index) && - _operands.at(scratch_buffer_index).info().isDynamic())) + (operands.exist(output_state_out_index) && + operands.at(output_state_out_index).info().isDynamic()) || + (operands.exist(cell_state_out_index) && + operands.at(cell_state_out_index).info().isDynamic()) || + (operands.exist(scratch_buffer_index) && + operands.at(scratch_buffer_index).info().isDynamic())) return; const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto input_to_output_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; - const auto &input_to_output_weights = _operands.at(input_to_output_weights_index); + const auto &input_to_output_weights = operands.at(input_to_output_weights_index); const auto recurrent_to_output_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; - const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index); + const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index); // re-sizing outputs const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1) @@ -555,21 +563,21 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) output.info().shape(ir::Shape{n_batch, n_output}); } - if (_operands.exist(output_state_out_index)) + if (operands.exist(output_state_out_index)) { - auto &output_state_out = _operands.at(output_state_out_index); + auto &output_state_out = operands.at(output_state_out_index); output_state_out.info().shape(ir::Shape{n_batch, n_output}); } - if (_operands.exist(cell_state_out_index)) + if (operands.exist(cell_state_out_index)) { - auto &cell_state_out = _operands.at(cell_state_out_index); + auto &cell_state_out = operands.at(cell_state_out_index); cell_state_out.info().shape(ir::Shape{n_batch, n_cell}); } - if (_operands.exist(scratch_buffer_index)) + if (operands.exist(scratch_buffer_index)) { - auto &scratch_buffer = _operands.at(scratch_buffer_index); + auto &scratch_buffer = operands.at(scratch_buffer_index); const auto input_to_input_weights_index{ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; @@ -577,11 +585,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op) op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; bool has_input_to_input_weights = - _operands.at(input_to_input_weights_index).shape().dim(0) != 0 && - _operands.at(input_to_input_weights_index).shape().dim(1) != 0; + operands.at(input_to_input_weights_index).shape().dim(0) != 0 && + operands.at(input_to_input_weights_index).shape().dim(1) != 0; bool has_recurrent_to_input_weights = - _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0; // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG). // true: no CIFG @@ -605,20 +613,21 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op) void StaticShapeInferer::visit(const ir::operation::OneHot &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)}; - const auto &indice = _operands.at(indice_idx); + const auto &indice = operands.at(indice_idx); const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)}; - const auto &depth = _operands.at(depth_idx); + const auto &depth = operands.at(depth_idx); const auto axis = op.param().axis; auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!depth.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -631,12 +640,14 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op) void StaticShapeInferer::visit(const ir::operation::Pack &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); const auto rank = input.shape().rank() + 1; const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -651,21 +662,22 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op) void StaticShapeInferer::visit(const ir::operation::Pad &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)}; - const auto &pad = _operands.at(pad_idx); + const auto &pad = operands.at(pad_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // if pad is not constant, output also becomes dynamic if (!pad.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -678,10 +690,12 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op) void StaticShapeInferer::visit(const ir::operation::Permute &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape // Permute is a special operation that layouts of input/output may be different on backend @@ -700,16 +714,18 @@ void StaticShapeInferer::visit(const ir::operation::Pow &op) void StaticShapeInferer::visit(const ir::operation::Range &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)}; const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)}; const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)}; - const auto &start_op = _operands.at(start_idx); - const auto &limit_op = _operands.at(limit_idx); - const auto &delta_op = _operands.at(delta_idx); + const auto &start_op = operands.at(start_idx); + const auto &limit_op = operands.at(limit_idx); + const auto &delta_op = operands.at(delta_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); ir::Shape new_shape; if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant()) @@ -731,21 +747,22 @@ void StaticShapeInferer::visit(const ir::operation::Range &op) else { output.info().setDynamic(); - _return_has_dynamic_tensor = true; } } void StaticShapeInferer::visit(const ir::operation::Reduce &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)}; - const auto &axes = _operands.at(axes_idx); + const auto &axes = operands.at(axes_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); std::vector axes_vec; for (size_t i = 0; i < axes.shape().num_elements(); ++i) @@ -777,19 +794,21 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op) void StaticShapeInferer::visit(const ir::operation::Reshape &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // New shape is given by second input tensor if (op.getInputs().size() == 2) { // Let's check the second input const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; - const auto &shape = _operands.at(shape_idx); + const auto &shape = operands.at(shape_idx); if (shape.isConstant()) { @@ -810,7 +829,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op) { // if shape is NOT Const, set output shape to be dynamic_ output.info().setDynamic(); - _return_has_dynamic_tensor = true; } } // New shape is given by option @@ -835,21 +853,22 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op) void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); int32_t height_out, width_out; if (op.getInputs().size() == 2) { - auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE)); + auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE)); if (!size.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } const auto size_v = size.asVector(); @@ -881,17 +900,19 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op) void StaticShapeInferer::visit(const ir::operation::Select &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)}; - const auto &input_cond = _operands.at(input_cond_idx); + const auto &input_cond = operands.at(input_cond_idx); const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; - const auto &input_true = _operands.at(input_true_idx); + const auto &input_true = operands.at(input_true_idx); const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - const auto &input_false = _operands.at(input_false_idx); + const auto &input_false = operands.at(input_false_idx); auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // Select output shpae ir::Shape new_shape = shape_inference::inferSelectShape( @@ -901,12 +922,14 @@ void StaticShapeInferer::visit(const ir::operation::Select &op) void StaticShapeInferer::visit(const ir::operation::Shape &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); // get mutable output operand const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // re-sizing output shape ir::Shape output_shape; @@ -917,20 +940,21 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op) void StaticShapeInferer::visit(const ir::operation::Slice &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)}; - const auto &begins = _operands.at(begins_index); + const auto &begins = operands.at(begins_index); const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)}; - const auto &sizes = _operands.at(sizes_index); + const auto &sizes = operands.at(sizes_index); const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); + ir::Operand &output = operands.at(output_index); // Whether input is constant or not does not affect whether output is dynamic or not if (!(begins.isConstant() && sizes.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -959,21 +983,22 @@ void StaticShapeInferer::visit(const ir::operation::Softmax &op) void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto output_index = op.getOutputs().at(0); const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)}; const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - ir::Operand &output = _operands.at(output_index); - const auto &input = _operands.at(input_idx); - const auto &block_shape = _operands.at(block_shape_idx); - const auto &padding = _operands.at(padding_idx); + ir::Operand &output = operands.at(output_index); + const auto &input = operands.at(input_idx); + const auto &block_shape = operands.at(block_shape_idx); + const auto &padding = operands.at(padding_idx); // Whether input is constant or not does not affect whether output is dynamic or not if (!(block_shape.isConstant() && padding.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -992,21 +1017,22 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) void StaticShapeInferer::visit(const ir::operation::Split &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)}; - const auto &axis = _operands.at(axis_idx); + const auto &axis = operands.at(axis_idx); auto outputs = op.getOutputs(); if (!axis.isConstant()) { for (auto output_idx : outputs) { - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().setDynamic(); } - _return_has_dynamic_tensor = true; return; } @@ -1022,7 +1048,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op) shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits); for (auto output_idx : outputs) { - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().shape(new_shape); } } @@ -1035,11 +1061,13 @@ void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op) void StaticShapeInferer::visit(const ir::operation::Squeeze &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); // Squeeze output shpae ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param()); @@ -1048,21 +1076,22 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op) void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; - const auto &input = _operands.at(input_index); + const auto &input = operands.at(input_index); const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; - const auto &starts = _operands.at(starts_index); + const auto &starts = operands.at(starts_index); const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; - const auto &ends = _operands.at(ends_index); + const auto &ends = operands.at(ends_index); const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - const auto &strides = _operands.at(strides_index); + const auto &strides = operands.at(strides_index); const auto output_index = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_index); + ir::Operand &output = operands.at(output_index); if (!(starts.isConstant() && ends.isConstant() && strides.isConstant())) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1085,19 +1114,20 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) void StaticShapeInferer::visit(const ir::operation::Tile &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)}; - const auto &multiplier = _operands.at(multiplier_idx); + const auto &multiplier = operands.at(multiplier_idx); const auto output_idx = op.getOutputs().at(0); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); if (!multiplier.isConstant()) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1112,11 +1142,13 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op) void StaticShapeInferer::visit(const ir::operation::Transpose &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)}; - const auto &perm = _operands.at(perm_idx); + const auto &perm = operands.at(perm_idx); // perm.shape() != ir::Shape{0} means that perm is (n-1...0) // TODO This condition changes to perm.num_elements() == 0 @@ -1124,11 +1156,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op) // get mutable output operand const auto output_idx = op.getOutputs().at(0); - auto &output = _operands.at(output_idx); + auto &output = operands.at(output_idx); if (!perm.isConstant() && !is_regular_transpose) { output.info().setDynamic(); - _return_has_dynamic_tensor = true; return; } @@ -1157,8 +1188,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op) void StaticShapeInferer::visit(const ir::operation::Unpack &op) { + auto &operands = _lowered_subg->graph().operands(); + const auto input_idx{op.getInputs().at(0)}; - const auto &input = _operands.at(input_idx); + const auto &input = operands.at(input_idx); const auto num = op.param().num; const auto rank = input.shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -1169,10 +1202,9 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op) for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) { const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().setDynamic(); } - _return_has_dynamic_tensor = true; return; } @@ -1182,69 +1214,43 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op) for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) { const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); + ir::Operand &output = operands.at(output_idx); output.info().shape(new_shape); } } void StaticShapeInferer::visit(const ir::operation::While &op) { - auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph(); - auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph(); + auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get(); + auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get(); + // re-sizing input shapes of body subgraph const auto inputs = op.getInputs(); - const auto &outputs = op.getOutputs(); - - // re-sizing input shapes of then subgraph - const auto &cond_inputs = cond_graph.getInputs(); - assert(inputs.size() == cond_inputs.size()); + std::vector inputs_info; + const auto &graph = _lowered_subg->graph(); for (size_t i = 0; i < inputs.size(); ++i) { - const auto &input = _operands.at(inputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if (input.info().isDynamic()) - { - cond_input.info().setDynamic(); - } - else - { - auto new_shape = input.info().shape(); - cond_input.info().shape(new_shape); - } + const auto &operand_info = graph.operands().at(inputs.at(i)).info(); + inputs_info.emplace_back(operand_info); } - // re-sizing input shapes of body subgraph - const auto &body_inputs = body_graph.getInputs(); - assert(cond_inputs.size() == body_inputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &body_input = body_graph.operands().at(body_inputs.at(i)); - if (cond_input.info().isDynamic()) - { - body_input.info().setDynamic(); - } - else - { - const auto &new_shape = cond_input.info().shape(); - body_input.info().shape(new_shape); - } - } - - // re-sizing operands of body subgraph - inferSubgraph(op.param().body_subg_index); + body_input_observer->updateShapes(inputs_info); + _child_inferers.at(op.param().body_subg_index)->infer(); // Check whether while operation's shapes are predictable - // If any of shape of body outputs and cond inputs are different, non-constant operands would be - // set to dynamic + // This while op's outputs are also updated in the above function + // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and + // thils op's outputs must have the same shape. So we can predict whether body subgraphs will + // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs + // and inputs are different Non-constant operands will be set to dynamic. bool check_unpredictable_dynamic = false; - const auto &body_outputs = body_graph.getOutputs(); - assert(body_outputs.size() == cond_inputs.size()); - for (size_t i = 0; i < body_outputs.size(); ++i) + const auto &updated_outputs = op.getOutputs(); + assert(inputs_info.size() == updated_outputs.size()); + for (size_t i = 0; i < updated_outputs.size(); ++i) { - const auto &body_output = body_graph.operands().at(body_outputs.at(i)); - auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) || - (cond_input.shape() != body_output.shape())) + const auto &input_info = inputs_info.at(i); + const auto &output_info = graph.operands().at(updated_outputs.at(i)).info(); + if (input_info.isDynamic() != output_info.isDynamic() || + input_info.shape() != output_info.shape()) { check_unpredictable_dynamic = true; break; @@ -1253,53 +1259,11 @@ void StaticShapeInferer::visit(const ir::operation::While &op) if (check_unpredictable_dynamic) { - // Set inputs of body subgraph - for (const auto &input_index : body_inputs) - { - auto &input = body_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set inputs of cond subgraph - for (const auto &input_index : cond_inputs) - { - auto &input = cond_graph.operands().at(input_index); - if (!input.isConstant()) - { - input.info().setDynamic(); - } - } - - // Set non-constant operands of body subgraph to dynamic - inferSubgraph(op.param().body_subg_index); - } - - // re-sizing operands of cond subgraph - // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to - // dynamic - inferSubgraph(op.param().cond_subg_index); - - // re-sizing outputs of while operation - // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic - assert(cond_inputs.size() == outputs.size()); - for (size_t i = 0; i < cond_inputs.size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i)); - auto &output = _operands.at(outputs.at(i)); - if (cond_input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - } - else - { - const auto new_shape = cond_input.info().shape(); - output.info().shape(new_shape); - } + body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().body_subg_index)->infer(); } + cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic); + _child_inferers.at(op.param().cond_subg_index)->infer(); } void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) @@ -1307,24 +1271,52 @@ void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) // TODO: NMS supports very limited input/output size. ir::operation::DetectionPostProcess::Param param = op.param(); + auto &operands = _lowered_subg->graph().operands(); const int num_detected_boxes = param.max_detections * param.max_classes_per_detection; const auto output_idx1 = op.getOutputs().at(0); - auto &output1 = _operands.at(output_idx1); + auto &output1 = operands.at(output_idx1); output1.info().shape({1, num_detected_boxes, 4}); const auto output_idx2 = op.getOutputs().at(1); - auto &output2 = _operands.at(output_idx2); + auto &output2 = operands.at(output_idx2); output2.info().shape({1, num_detected_boxes}); const auto output_idx3 = op.getOutputs().at(2); - auto &output3 = _operands.at(output_idx3); + auto &output3 = operands.at(output_idx3); output3.info().shape({1, num_detected_boxes}); const auto output_idx4 = op.getOutputs().at(3); - auto &output4 = _operands.at(output_idx4); + auto &output4 = operands.at(output_idx4); output4.info().shape({1}); } +void StaticShapeInferer::visit(const ir::operation::Bulk &op) +{ + auto &operands = _lowered_subg->graph().operands(); + + // TODO: support multiple inputs/outputs + const auto input_idx{op.getInputs().at(0)}; + const auto &input = operands.at(input_idx); + const auto output_idx = op.getOutputs().at(0); + ir::Operand &output = operands.at(output_idx); + + auto cur_input_shape = input.info().shape(); + auto origin_input_shape = op.param().origin_input_shapes[0]; + auto cur_output_shape = output.info().shape(); + auto origin_output_shape = op.param().origin_output_shapes[0]; + + // TODO: more check for valid batch request + assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0)); + assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0); + size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0); + + ir::Shape new_shape; + new_shape.append(origin_output_shape.dim(0) * batch_multiplier); + for (int32_t d = 1; d < origin_output_shape.rank(); ++d) + new_shape.append(origin_output_shape.dim(d)); + + output.info().shape(new_shape); +} } // namespace compiler diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h index 2a99db7..b3cc0bb 100644 --- a/runtime/onert/core/src/compiler/TensorRegistries.h +++ b/runtime/onert/core/src/compiler/TensorRegistries.h @@ -17,13 +17,14 @@ #ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__ #define __ONERT_COMPILER_TENSOR_REGISTRIES_H__ -#include -#include -#include "backend/BackendContext.h" +#include "../backend/builtin/Config.h" +#include "../backend/builtin/TensorRegistry.h" + #include "backend/Backend.h" -#include "backend/builtin/Config.h" -#include "backend/builtin/TensorBuilder.h" -#include "backend/builtin/TensorRegistry.h" +#include "backend/BackendContext.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc index 181f388..c27ce3d 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc @@ -15,7 +15,6 @@ */ #include "PermutationEliminationPass.h" -#include "backend/builtin/Config.h" #include "util/logging.h" diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index 6f98991..71efa1b 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -17,18 +17,16 @@ #include "PermutationInsertionPass.h" -#include -#include -#include +#include "../../backend/builtin/Config.h" -#include "backend/builtin/Config.h" -#include "ir/Operand.h" #include "compiler/OperationLowerInfo.h" -#include "ir/Graph.h" -#include "backend/IConfig.h" +#include "ir/operation/Permute.h" #include "util/logging.h" + +#include #include -#include "ir/operation/Permute.h" +#include +#include namespace onert { @@ -125,6 +123,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde // backend auto &model_outputs = _graph.getOutputs(); const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin(); + assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID); + if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend) { model_outputs.replace(operand_index, out_operand_index); @@ -141,6 +141,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde const auto permute_node_layout = ir::Layout::UNKNOWN; // NOTE If one backend supports several layout, the backend must support Permute operation const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin(); + assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID); + if (input_backend == output_backend) { permute_node_backend = input_backend; diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc new file mode 100644 index 0000000..572b4df --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "UnusedOperandEliminationPass.h" + +#include "ir/Graph.h" + +#include + +using namespace onert::ir; +using namespace onert::compiler::pass; + +TEST(UnusedOperandEliminationPass, Simple) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + auto unused = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + + UnusedOperandEliminationPass{graph}.run(); + + ASSERT_TRUE(graph.operands().exist(in)); + ASSERT_TRUE(graph.operands().exist(out)); + ASSERT_FALSE(graph.operands().exist(unused)); +} diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc index 714fb6f..0bb2fa1 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.cc +++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc @@ -19,6 +19,7 @@ #include "DotDumper.h" #include "DotBuilder.h" +#include "ir/OperandIndexMap.h" #include "ir/OperationIndexMap.h" #include "backend/Backend.h" #include "backend/IConfig.h" @@ -31,97 +32,72 @@ namespace dumper namespace dot { -void DotDumper::dump(const std::string &tag) +namespace { - if (_level == Level::OFF) - { - return; - } - - onert::dumper::dot::DotBuilder dot_builder; - - auto &operations = _graph.operations(); - auto &operands = _graph.operands(); - - ir::OperationIndexMap> operation_nodes; - std::unordered_map> operand_nodes; - - auto backend_to_fillcolor = [](const backend::Backend *backend) { - static const auto map = []() { - std::unordered_map ret; - uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( - for (const auto backend : compiler::BackendManager::get().getAll()) - { - ret.emplace(backend, Node::BG_COLORS[index]); - index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); - } - return ret; - }(); - - auto itr = map.find(backend); - if (itr == map.end()) - { - return Node::DEFAULT_FILLCOLOR; - } - else +std::string backend_to_fillcolor(const backend::Backend *backend) +{ + static const auto map = []() { + std::unordered_map ret; + uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :( + for (const auto backend : compiler::BackendManager::get().getAll()) { - return itr->second; + ret.emplace(backend, Node::BG_COLORS[index]); + index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0])); } - }; + return ret; + }(); + auto itr = map.find(backend); + if (itr == map.end()) + { + return Node::DEFAULT_FILLCOLOR; + } + else + { + return itr->second; + } +} - util::Set shown_operand_set; +std::unordered_map> +generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level) +{ + std::unordered_map> dot_operands; + const auto &operands = graph.operands(); operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) { - bool showing_cond = false; - if (_level == Level::ALL) - { - showing_cond = true; - } - else - { - showing_cond = - !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index); - } + bool showing_cond = + level == DotDumper::Level::ALL + ? true + : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index); if (showing_cond) { - shown_operand_set.add(index); - auto type = [&]() { using onert::dumper::dot::Operand; - if (_graph.getInputs().contains(index)) + if (graph.getInputs().contains(index)) return Operand::Type::MODEL_INPUT; - if (_graph.getOutputs().contains(index)) + if (graph.getOutputs().contains(index)) return Operand::Type::MODEL_OUTPUT; return Operand::Type::INTERNAL; }(); auto node = std::make_unique(index, type); + std::string label = std::to_string(index.value()); + std::string fillcolor = ""; + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); - { - // Display LowerInfo attributes - std::string label = std::to_string(index.value()); - std::string fillcolor = ""; - if (_lowered_graph) - { - auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index); - const auto &def_factors = lower_info->def_factors(); - if (def_factors.size() > 0) - { - label += "\\n["; - label += def_factors.getOnlyElement().backend()->config()->id(); - label += "]"; - - fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); - } - } - node->setAttribute("label", label); - node->setAttribute("fillcolor", fillcolor); - } - - operand_nodes.emplace(index, std::move(node)); + dot_operands.emplace(index, std::move(node)); } }); + return dot_operands; +} + +ir::OperationIndexMap> +generate_dot_operations(const ir::Graph &graph, + const ir::OperandIndexMap> &dot_operands) +{ + ir::OperationIndexMap> dot_operations; + const auto &operations = graph.operations(); operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) { auto node = std::make_unique(index, op); @@ -130,42 +106,79 @@ void DotDumper::dump(const std::string &tag) using onert::dumper::dot::Operand; // Constant input and dump level is ALL_BUT_CONSTANTS - if (operand_nodes.find(input) == operand_nodes.end()) + if (dot_operands.find(input) == dot_operands.end()) continue; - auto &input_node = operand_nodes.at(input); + auto &input_node = dot_operands.at(input); input_node->addOutEdge(node.get()); } for (auto output : op.getOutputs() | ir::Remove::UNDEFINED) { using onert::dumper::dot::Operand; - auto &output_node = operand_nodes.at(output); + auto &output_node = dot_operands.at(output); node->addOutEdge(output_node.get()); } - operation_nodes.emplace(index, std::move(node)); + dot_operations.emplace(index, std::move(node)); }); - if (_lowered_graph) - { - _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) { - const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index); - if (lower_info) + return dot_operations; +} + +void update_lower_info(const compiler::LoweredGraph &lowered_graph, + ir::OperandIndexMap> *dot_operands) +{ + const auto &operands = lowered_graph.graph().operands(); + operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + auto itr = dot_operands->find(index); + if (itr != dot_operands->end()) + { + auto &node = itr->second; + // Display LowerInfo attributes + std::string label = node->getAttribute("label"); + std::string fillcolor = node->getAttribute("fillcolor"); + auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index); + const auto &def_factors = lower_info->def_factors(); + if (def_factors.size() > 0) { - auto fillcolor = backend_to_fillcolor(lower_info->backend()); - std::string backend_label = "[" + lower_info->backend()->config()->id() + "]"; - auto itr = operation_nodes.find(index); - if (itr != operation_nodes.end()) - { - auto &node = itr->second; - node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label); - node->setAttribute("fillcolor", fillcolor); - } + label += "\\n["; + label += def_factors.getOnlyElement().backend()->config()->id(); + label += "]"; + fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend()); } - }); - } + node->setAttribute("label", label); + node->setAttribute("fillcolor", fillcolor); + } + }); +} +void update_lower_info(const compiler::LoweredGraph &lowered_graph, + ir::OperationIndexMap> *dot_operations) +{ + const auto &operations = lowered_graph.graph().operations(); + operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) { + const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index); + if (lower_info) + { + auto fillcolor = backend_to_fillcolor(lower_info->backend()); + std::string backend_label = "[" + lower_info->backend()->config()->id() + "]"; + auto itr = dot_operations->find(index); + if (itr != dot_operations->end()) + { + auto &node = itr->second; + node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label); + node->setAttribute("fillcolor", fillcolor); + } + } + }); +} + +void dump_to_file(const ir::OperandIndexMap> &operand_nodes, + const ir::OperationIndexMap> &operation_nodes, + const std::string &tag) +{ + onert::dumper::dot::DotBuilder dot_builder; for (const auto &e : operation_nodes) dot_builder.update(*e.second); for (const auto &e : operand_nodes) @@ -186,6 +199,33 @@ void DotDumper::dump(const std::string &tag) fb.close(); } } +} // namespace + +void DotDumper::dump(const ir::Graph &graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + const auto dot_operands = generate_dot_operands(graph, _level); + const auto dot_operations = generate_dot_operations(graph, dot_operands); + dump_to_file(dot_operands, dot_operations, tag); +} + +void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag) +{ + if (_level == Level::OFF) + { + return; + } + + auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level); + auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands); + update_lower_info(lowered_graph, &dot_operands); + update_lower_info(lowered_graph, &dot_operations); + dump_to_file(dot_operands, dot_operations, tag); +} } // namespace dot } // namespace dumper diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h index f300c34..6249010 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.h +++ b/runtime/onert/core/src/dumper/dot/DotDumper.h @@ -38,27 +38,28 @@ public: }; public: - DotDumper(const ir::Graph &graph, Level level) - : _lowered_graph{nullptr}, _graph(graph), _level{level} - { - } - DotDumper(const compiler::LoweredGraph *lowered_graph, Level level) - : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level} - { - } + DotDumper(Level level) : _level{level} {} public: /** - * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set + * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set + * + * @param[in] graph The graph that would be used to get operations and operands + * @param[in] tag The name of dot file that would be created + * @return N/A + */ + void dump(const ir::Graph &graph, const std::string &tag); + + /** + * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set * + * @param[in] graph The graph that would be used to get operations and operands * @param[in] tag The name of dot file that would be created * @return N/A */ - void dump(const std::string &tag); + void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag); private: - const compiler::LoweredGraph *_lowered_graph; - const ir::Graph &_graph; Level _level; }; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index bcac19d..1649be7 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -17,19 +17,18 @@ #ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ #define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__ -#include -#include -#include - -#include "exec/FunctionSequence.h" +#include "ExecutorBase.h" #include "Job.h" -#include "ir/OperandIndexSequence.h" -#include "ir/Index.h" -#include -#include "exec/ExecutorBase.h" + #include "compiler/CodeMap.h" +#include "ir/OperandIndexSequence.h" #include "util/TracingCtx.h" +#include +#include +#include +#include + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc index 6bf2744..4b82655 100644 --- a/runtime/onert/core/src/exec/ExecTime.cc +++ b/runtime/onert/core/src/exec/ExecTime.cc @@ -14,12 +14,10 @@ * limitations under the License. */ -#include "exec/ExecTime.h" +#include "ExecTime.h" -#include -#include -#include #include +#include namespace onert { diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc new file mode 100644 index 0000000..1f7152e --- /dev/null +++ b/runtime/onert/core/src/exec/ExecTime.test.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecTime.h" + +#include "backend/IConfig.h" +#include "backend/Backend.h" + +#include + +#include + +namespace +{ +using namespace onert; +using namespace exec; +using namespace backend; + +struct MockConfig : public IConfig +{ + std::string id() override { return "b1"; } + bool initialize() override { return true; }; + bool supportPermutation() override { return false; } + ir::Layout supportLayout(const ir::Operation &, ir::Layout) override + { + return ir::Layout::UNKNOWN; + } + bool supportDynamicTensor() override { return false; } + bool supportFP16() override { return false; } +}; + +struct MockBackend : public ::onert::backend::Backend +{ + std::shared_ptr config() const override + { + return std::make_shared(); + } + std::unique_ptr newContext(ContextData &&) const override + { + return nullptr; + } +}; + +TEST(ExecTime, roundtrip_ok) +{ + const auto *b = new MockBackend(); + std::vector bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.updateOperationExecTime(b, "op1", false, 100, 888); + et.storeOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 150); + ASSERT_EQ(time, 150); + time = et.getOperationExecTime(b, "op1", false, 100); + ASSERT_EQ(time, 888); + et.storeOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} + +TEST(ExecTime, structure) +{ + + const auto *b = new MockBackend(); + std::vector bs = {b}; + { + ExecTime et(bs); + et.updateOperationExecTime(b, "op1", true, 100, 100); + et.updateOperationExecTime(b, "op1", true, 200, 200); + et.storeOperationsExecTime(); + } + { + ExecTime et(bs); + auto time = et.getOperationExecTime(b, "op1", true, 100); + ASSERT_EQ(time, 100); + // Check interpolation + time = et.getOperationExecTime(b, "op1", true, 200); + ASSERT_EQ(time, 200); + et.storeOperationsExecTime(); + } + // clean up + EXPECT_EQ(remove("exec_time.json"), 0); +} +} // unnamed namespace diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc index 8eff73b..9d1e06d 100644 --- a/runtime/onert/core/src/exec/Execution.cc +++ b/runtime/onert/core/src/exec/Execution.cc @@ -23,13 +23,12 @@ namespace onert namespace exec { -Execution::Execution(const std::shared_ptr &executors) : _executors{executors} +Execution::Execution(const std::shared_ptr &executors) : _executors{executors} { assert(executors != nullptr); assert(executors->at(ir::SubgraphIndex{0}) != nullptr); - const auto &primary_subg = primary_subgraph(); - _io_desc.inputs.resize(primary_subg.getInputs().size()); - _io_desc.outputs.resize(primary_subg.getOutputs().size()); + _io_desc.inputs.resize(_executors->inputSize()); + _io_desc.outputs.resize(_executors->outputSize()); sem_init(&_async_io_descs_sem, 0, 1); } @@ -48,8 +47,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length, ir::Layout layout) { - const auto input_index = primary_subgraph().getInputs().at(index); - const auto info = primary_subgraph().operands().at(input_index).info(); + const auto info = _executors->inputInfo(index); // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor @@ -105,8 +103,7 @@ bool Execution::isEmptyQueue() void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length, ir::Layout layout) { - const auto input_index = primary_subgraph().getInputs().at(index); - const auto info = primary_subgraph().operands().at(input_index).info(); + const auto info = _executors->inputInfo(index); IODescription *_async_io_desc = _async_io_descs.back().first; { @@ -135,8 +132,7 @@ void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) { - const auto output_index = primary_subgraph().getOutputs().at(index); - const auto info = primary_subgraph().operands().at(output_index).info(); + const auto info = _executors->outputInfo(index); IODescription *_async_io_desc = _async_io_descs.front().first; if (length < info.total_size()) @@ -165,8 +161,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con // TODO Remove default parameter void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout) { - const auto output_index = primary_subgraph().getOutputs().at(index); - const auto info = primary_subgraph().operands().at(output_index).info(); + const auto info = _executors->outputInfo(index); if (length < info.total_size()) { @@ -208,7 +203,7 @@ void Execution::execute() { VERBOSE(Execution) << "Start execution" << std::endl; - primary_executor()->execute(_io_desc); + _executors->execute(_io_desc); finished = true; VERBOSE(Execution) << "Execution finished" << std::endl; @@ -248,8 +243,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const auto itr = _io_desc.dynamic_input_shapes.find(ind); if (itr == _io_desc.dynamic_input_shapes.end()) { - auto operand_idx = primary_subgraph().getInputs().at(ind); - return primary_subgraph().operands().at(operand_idx).shape(); + return _executors->inputInfo(ind).shape(); } else { diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc new file mode 100644 index 0000000..e3ea494 --- /dev/null +++ b/runtime/onert/core/src/exec/Execution.test.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Execution.h" + +#include "compiler/Compiler.h" +#include "ir/Graph.h" +#include "ir/operation/BinaryArithmetic.h" +#include "util/TracingCtx.h" + +#include +#include + +namespace +{ + +using namespace onert::ir; + +class CompiledMockUpModel +{ +public: + CompiledMockUpModel() + { + // Model: two elementwise add operation + // model input: lhs, rhs1 + // model output: second add result (result2) + // constant: rhs2 + // result1 <= (lhs + rhs) + // result2 <= (result1 + rhs2) + // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + graph = std::make_shared(); + // 1st add operands (result1 <= lhs + rhs1) + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + static float rhs2_data[4] = {3, 1, -1, 5}; + auto operand_lhs = graph->addOperand(shape, type); + auto operand_rhs1 = graph->addOperand(shape, type); + auto operand_result1 = graph->addOperand(shape, type); + auto operand_rhs2 = graph->addOperand(shape, type); + auto operand_result2 = graph->addOperand(shape, type); + graph->operands() + .at(operand_rhs2) + .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); + // 2nd add operations (result2 <= result1 + rhs2) + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + graph->addOperation( + std::make_unique(input_set1, output_set1, param1)); + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + graph->addOperation( + std::make_unique(input_set2, output_set2, param2)); + // Identify model inputs and outputs + graph->addInput(operand_lhs); + graph->addInput(operand_rhs1); + graph->addOutput(operand_result2); + graph->verify(); + + // Compile + auto model = std::make_shared(); + model->push(onert::ir::SubgraphIndex{0}, graph); + coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + artifact = compiler.compile(); + } + +public: + std::shared_ptr graph; + std::unique_ptr coptions; + std::shared_ptr artifact; +}; + +TEST(ExecInstance, simple) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast(output_buffer), 16); + execution.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +TEST(ExecInstance, twoCompile) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors1 = mockup.artifact->_executors; + onert::exec::Execution execution1{executors1}; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + execution1.setInput(input1, reinterpret_cast(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast(exe1_input2_buffer), 16); + execution1.setOutput(output, reinterpret_cast(exe1_output_buffer), 16); + + // Make new executor: compile again + auto model = std::make_shared(); + model->push(onert::ir::SubgraphIndex{0}, graph); + auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig(); + onert::compiler::Compiler compiler{model, *coptions}; + std::shared_ptr artifact = compiler.compile(); + onert::exec::Execution execution2{artifact->_executors}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + execution2.setInput(input1, reinterpret_cast(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast(exe2_input2_buffer), 16); + execution2.setOutput(output, reinterpret_cast(exe2_output_buffer), 16); + + execution1.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support two initialized execution instance then ordered execution +TEST(ExecInstance, twoExecution) +{ + auto mockup = CompiledMockUpModel(); + auto executors = mockup.artifact->_executors; + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + onert::exec::Execution execution1{executors}; + execution1.setInput(input1, reinterpret_cast(exe1_input1_buffer), 16); + execution1.setInput(input2, reinterpret_cast(exe1_input2_buffer), 16); + execution1.setOutput(output1, reinterpret_cast(exe1_output_buffer), 16); + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + + // Make new execution + onert::exec::Execution execution2{executors}; + execution2.setInput(input1, reinterpret_cast(exe2_input1_buffer), 16); + execution2.setInput(input2, reinterpret_cast(exe2_input2_buffer), 16); + execution2.setOutput(output1, reinterpret_cast(exe2_output_buffer), 16); + + execution1.execute(); + execution2.execute(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +class Inference +{ +public: + Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], + std::shared_ptr &executors) + : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} + { + // DO NOTHING + } + + void inference(void) + { + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output1 = IOIndex{0}; + + onert::exec::Execution execution{_executors}; + execution.setInput(input1, reinterpret_cast(_input1), 16); + execution.setInput(input2, reinterpret_cast(_input2), 16); + execution.setOutput(output1, reinterpret_cast(_output), 16); + + execution.execute(); + } + +private: + const float (&_input1)[4]; + const float (&_input2)[4]; + float (&_output)[4]; + std::shared_ptr &_executors; +}; + +// Support multi-thread execution +TEST(ExecInstance, twoThreads) +{ + auto mockup = CompiledMockUpModel(); + auto executors = mockup.artifact->_executors; + + const float exe1_input1_buffer[4] = {1, 0, -1, -2}; + const float exe1_input2_buffer[4] = {1, -3, 2, -4}; + float exe1_output_buffer[4] = {}; + const float exe1_output_expected[4] = {5, -2, 0, -1}; + + Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors}; + + const float exe2_input1_buffer[4] = {2, 1, -2, 0}; + const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; + float exe2_output_buffer[4] = {}; + const float exe2_output_expected[4] = {2, 5, -2, 7}; + + Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors}; + + std::thread t1{&Inference::inference, &execution1}; + std::thread t2{&Inference::inference, &execution2}; + + t1.join(); + t2.join(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); + EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); + } +} + +// Support asynchronous execution +TEST(ExecInstance, async) +{ + auto mockup = CompiledMockUpModel(); + auto graph = mockup.graph; + auto executors = mockup.artifact->_executors; + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto output = IOIndex{0}; + + const float input1_buffer[4] = {1, 0, -1, -2}; + const float input2_buffer[4] = {1, -3, 2, -4}; + float output_buffer[4] = {}; + const float output_expected[4] = {5, -2, 0, -1}; + + onert::exec::Execution execution{executors}; + + execution.setInput(input1, reinterpret_cast(input1_buffer), 16); + execution.setInput(input2, reinterpret_cast(input2_buffer), 16); + execution.setOutput(output, reinterpret_cast(output_buffer), 16); + execution.startExecute(); + execution.waitFinish(); + + for (auto i = 0; i < 4; i++) + { + EXPECT_EQ(output_buffer[i], output_expected[i]); + } +} + +} // namespace diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h index 423b502..3ee1754 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.h +++ b/runtime/onert/core/src/exec/ExecutionObservee.h @@ -17,11 +17,12 @@ #ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__ #define __ONERT_EXEC_EXECUTION_OBSERVEE_H__ -#include +#include "ExecutionObservers.h" -#include "exec/ExecutionObservers.h" #include "ir/Index.h" +#include + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 386178a..9abde7b 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "exec/ExecutionObservers.h" +#include "ExecutionObservers.h" -#include -#include +#include "../util/EventWriter.h" #include "util/logging.h" -#include "exec/IExecutor.h" -#include "misc/polymorphic_downcast.h" -#include "ir/Operation.h" -#include "util/EventWriter.h" + +#include + +#include +#include namespace { diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index 4c6c7b1..1aadac2 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -17,17 +17,16 @@ #ifndef __ONERT_EXEC_OBSREVERS_H__ #define __ONERT_EXEC_OBSREVERS_H__ -#include "exec/IFunction.h" +#include "ExecTime.h" +#include "../util/EventCollector.h" +#include "../util/EventRecorder.h" +#include "../util/EventWriter.h" + +#include "exec/Executors.h" #include "ir/Index.h" #include "ir/Operation.h" -#include "ExecTime.h" #include "util/ITimer.h" -#include "exec/IExecutor.h" -#include "util/EventCollector.h" -#include "util/EventRecorder.h" -#include "util/EventWriter.h" #include "util/TracingCtx.h" -#include "util/EventWriter.h" namespace onert { diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index efc22cf..d2d204a 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -15,11 +15,10 @@ */ #include "ExecutorBase.h" + #include "ShapeConverter.h" -#include "backend/builtin/UserTensor.h" -#include "util/logging.h" -#include "misc/polymorphic_downcast.h" +#include namespace onert { diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index c0f609d..e4f9145 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -17,22 +17,17 @@ #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__ #define __ONERT_EXEC_EXECUTOR_BASE_H__ -#include "IPermuteFunction.h" +#include "ExecutionObservee.h" +#include "../backend/builtin/IOTensor.h" +#include "../compiler/TensorRegistries.h" + +#include "compiler/LoweredGraph.h" #include "exec/IExecutor.h" -#include "exec/ExecTime.h" -#include "exec/ExecutionObservee.h" -#include "exec/IFunction.h" #include "exec/IODescription.h" #include "ir/Graph.h" -#include "ir/Index.h" -#include "compiler/GraphLowerInfo.h" #include "ir/OperationIndexMap.h" -#include "compiler/LoweredGraph.h" -#include "compiler/TensorRegistries.h" -#include "backend/builtin/IOTensor.h" #include "util/TracingCtx.h" -#include #include #include #include diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc new file mode 100644 index 0000000..e0ee24f --- /dev/null +++ b/runtime/onert/core/src/exec/Executors.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "exec/Executors.h" + +namespace onert +{ +namespace exec +{ + +uint32_t Executors::inputSize() const +{ + return _model_edges ? _model_edges->pkg_inputs.size() + : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size(); +} + +uint32_t Executors::outputSize() const +{ + return _model_edges ? _model_edges->pkg_outputs.size() + : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size(); +} + +const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index) +{ + if (_model_edges) + { + // Assume that each model may have only one subgraph + // TODO handle general case + const auto desc = _model_edges->pkg_inputs[index.value()]; + const auto model_idx = std::get<0>(desc); + const auto executor_idx = ir::SubgraphIndex{model_idx.value()}; + const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc)); + return _executors.at(executor_idx)->graph().operands().at(input_index).info(); + } + + const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index); + return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info(); +} + +const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index) +{ + if (_model_edges) + { + // Assume that each model may have only one subgraph + // TODO handle general case + auto desc = _model_edges->pkg_outputs[index.value()]; + auto model_idx = std::get<0>(desc); + auto executor_idx = ir::SubgraphIndex{model_idx.value()}; + auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc)); + return _executors.at(executor_idx)->graph().operands().at(output_index).info(); + } + + auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index); + return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info(); +} + +void Executors::execute(const IODescription &desc) +{ + if (_model_edges) + return executeEntries(desc); + + _executors.at(ir::SubgraphIndex{0})->execute(desc); +} + +void Executors::executeEntries(const IODescription &desc) +{ + // Assume 2 executors only + // Assume that each model may have only one subgraph + // TODO Support general case + if (_executors.size() != 2) + throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"}; + + // Assume all edges are 0:0:x -> 1:0:x + for (auto edge : _model_edges->edges) + { + if ((std::get(edge.from) != ir::ModelIndex{0}) || + (std::get(edge.to) != ir::ModelIndex{1}) || + (std::get(edge.from) != ir::SubgraphIndex{0}) || + (std::get(edge.to) != ir::SubgraphIndex{0}) || + (std::get(edge.from) != std::get(edge.to))) + throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"}; + } + + // Assume all package inputs are 0:0:x + for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++) + { + auto input = _model_edges->pkg_inputs[i]; + if ((std::get(input) != ir::ModelIndex{0}) || + (std::get(input) != ir::SubgraphIndex{0}) || + (std::get(input) != ir::IOIndex{i})) + { + throw std::runtime_error{"NYI: Support package input to 1st model with same order"}; + } + } + + // Assume all package outputs are 1:0:x + for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++) + { + auto output = _model_edges->pkg_outputs[i]; + if ((std::get(output) != ir::ModelIndex{1}) || + (std::get(output) != ir::SubgraphIndex{0}) || + (std::get(output) != ir::IOIndex{i})) + { + throw std::runtime_error{"NYI: Support package output from 2nd model with same order"}; + } + } + + const auto &executor1 = _executors.at(ir::SubgraphIndex{0}); + const auto &graph1 = executor1->graph(); + const auto &executor2 = _executors.at(ir::SubgraphIndex{1}); + const auto &graph2 = executor2->graph(); + + if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) || + (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) || + (graph1.getOutputs().size() != graph2.getInputs().size()) || + (graph1.getOutputs().size() != _model_edges->edges.size())) + { + throw std::runtime_error{"NYI: Unsupported model edge pattern"}; + } + + // Prepare buffer + // Assume buffer layout is NHWC + std::vector> bufs(_model_edges->edges.size()); + std::vector buf_infos(_model_edges->edges.size()); + const auto layout = ir::Layout::NHWC; + + for (uint32_t i = 0; i < graph1.getOutputs().size(); i++) + { + const auto buf_index = + _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i}); + buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info(); + const auto buf_size = buf_infos[i]->total_size(); + bufs[i] = std::make_unique(buf_size); + } + + // 1st executor + { + IODescription desc1; + const auto input_size = graph1.getInputs().size(); + const auto output_size = graph1.getOutputs().size(); + desc1.inputs.resize(input_size); + desc1.outputs.resize(output_size); + for (uint32_t i = 0; i < input_size; i++) + desc1.inputs[i] = std::make_unique(*desc.inputs[i].get()); + for (uint32_t i = 0; i < output_size; i++) + desc1.outputs[i] = std::make_unique(*buf_infos[i], bufs[i].get(), + buf_infos[i]->total_size(), layout); + + executor1->execute(desc1); + } + + // 2nd executor + { + IODescription desc2; + const auto input_size = graph2.getInputs().size(); + const auto output_size = graph2.getOutputs().size(); + desc2.inputs.resize(input_size); + desc2.outputs.resize(output_size); + for (uint32_t i = 0; i < input_size; i++) + desc2.inputs[i] = std::make_unique(*buf_infos[i], bufs[i].get(), + buf_infos[i]->total_size(), layout); + for (uint32_t i = 0; i < output_size; i++) + desc2.outputs[i] = std::make_unique(*desc.outputs[i].get()); + + executor2->execute(desc2); + } +} + +} // namespace exec +} // namespace onert diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc index df68b1b..f87c271 100644 --- a/runtime/onert/core/src/exec/FunctionSequence.cc +++ b/runtime/onert/core/src/exec/FunctionSequence.cc @@ -34,9 +34,7 @@ void FunctionSequence::run() // Thus, those two bakends cannot reach here. // Do dynamic shape inference - auto op_ind = _dynamic_tensor_ctx->op_ind; - auto &op = _dynamic_tensor_ctx->operations->at(op_ind); - op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); + _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer); for (const auto &function : _functions) { diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc index b29216a..d149345 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.cc +++ b/runtime/onert/core/src/exec/JSONExecTime.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "exec/JSONExecTime.h" -#include "backend/IConfig.h" +#include "JSONExecTime.h" + #include namespace onert diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index 39d6531..a833466 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -22,11 +22,10 @@ #ifndef __ONERT_EXEC_EXECUTOR_H_ #define __ONERT_EXEC_EXECUTOR_H_ -#include "ir/Index.h" #include "ExecutorBase.h" -#include "compiler/Linear.h" -#include "exec/FunctionSequence.h" + #include "compiler/CodeMap.h" +#include "ir/Index.h" #include "util/TracingCtx.h" namespace onert diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 7f107fa..7d459b0 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -17,19 +17,13 @@ #ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__ #define __ONERT_EXEC_PARALLEL_EXECUTOR_H__ -#include -#include -#include - -#include "exec/FunctionSequence.h" -#include "Job.h" -#include "ir/OperandIndexSequence.h" -#include "ir/Index.h" -#include -#include "exec/DataflowExecutor.h" +#include "DataflowExecutor.h" #include "ParallelScheduler.h" + #include "util/TracingCtx.h" +#include + namespace onert { namespace exec diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h new file mode 100644 index 0000000..1d2d375 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/MockTensor.h @@ -0,0 +1,66 @@ + +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/ITensor.h" + +template class MockTensor : public onert::backend::ITensor +{ +public: + MockTensor(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout) + : _buf(reinterpret_cast(buf)), _shape(shape), _layout(layout) + { + } + +public: + uint8_t *buffer() const override { return _buf; } + + size_t calcOffset(const onert::ir::Coordinates &coords) const override + { + size_t rank = _shape.rank(); + rank = rank == 0 ? 1 : rank; + size_t offset = 0; + for (size_t i = 0; i < rank; ++i) + { + auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i); + offset = offset * dim + coords[i]; + } + offset *= sizeof(T); + + return offset; + } + + onert::ir::Shape getShape() const override { return _shape; } + +public: // DUMMY methods + size_t total_size() const override { return 0; } + onert::ir::Layout layout() const override { return _layout; } + onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; } + float data_scale() const override { return 0; } + int32_t data_zero_point() const override { return 0; } + const std::vector &data_scales() const override { return _dummy_scales; } + const std::vector &data_zero_points() const override { return _dummy_zerops; } + bool has_padding() const override { return false; } + void access(const std::function &fn) override {} + bool is_dynamic() const override { return false; } + +private: + uint8_t *_buf = nullptr; + onert::ir::Shape _shape; + onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN; + std::vector _dummy_scales; + std::vector _dummy_zerops; +}; diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc new file mode 100644 index 0000000..f439caf --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include + +using namespace onert::exec::feature; + +template class Reader_nchw : public testing::Test +{ +public: + void setData(std::initializer_list list) { _data = std::make_shared>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared>(shape, _data->data(), onert::ir::Layout::NCHW); + _reader = std::make_shared>(_tensor.get()); + } + + std::shared_ptr> _reader = nullptr; + +private: + std::shared_ptr> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types; +TYPED_TEST_SUITE(Reader_nchw, ReaderTypes); + +TYPED_TEST(Reader_nchw, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createReader(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc new file mode 100644 index 0000000..c6dcda7 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include + +using namespace onert::exec::feature; + +template class View_nchw : public testing::Test +{ +public: + void setData(std::initializer_list list) { _data = std::make_shared>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared>(shape, _data->data(), onert::ir::Layout::NCHW); + _view = std::make_shared>(_tensor.get()); + } + + std::shared_ptr> _view = nullptr; + +private: + std::shared_ptr> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types; +TYPED_TEST_SUITE(View_nchw, ViewTypes); + +TYPED_TEST(View_nchw, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 6, 2, 1); + this->createView(); + + // Data: NCHW + // Shape: NCHW + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NCHW + // Shape: NCHW + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc new file mode 100644 index 0000000..7731990 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Reader.h" + +#include "../MockTensor.h" + +#include + +using namespace onert::exec::feature; + +template class Reader_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list list) { _data = std::make_shared>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createReader() + { + _reader = + std::make_shared>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared>(shape, _data->data(), onert::ir::Layout::NHWC); + _reader = std::make_shared>(_tensor.get()); + } + + std::shared_ptr> _reader = nullptr; + +private: + std::shared_ptr> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr> _tensor = nullptr; +}; + +using ReaderTypes = ::testing::Types; +TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes); +TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes); + +TYPED_TEST(Reader_nhwc, basic_reader) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createReader(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_reader->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_reader->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h index 40d1d23..c98d050 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/View.h +++ b/runtime/onert/core/src/exec/feature/nhwc/View.h @@ -17,7 +17,7 @@ #ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ #define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__ -#include "../Reader.h" +#include "Reader.h" #include #include diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc new file mode 100644 index 0000000..bdd73d5 --- /dev/null +++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "View.h" + +#include "../MockTensor.h" + +#include + +using namespace onert::exec::feature; + +template class View_nhwc : public testing::Test +{ +public: + void setData(std::initializer_list list) { _data = std::make_shared>(list); } + + void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + _shape = onert::ir::FeatureShape(batch, depth, height, width); + } + + void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width) + { + auto elem_size = sizeof(T); + _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size, + width * elem_size); + } + + void createView() + { + _view = + std::make_shared>(_shape, _stride, _data->data(), _data->size() * sizeof(T)); + } + + void createUsingMockTensor() + { + onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C}; + _tensor = std::make_shared>(shape, _data->data(), onert::ir::Layout::NHWC); + _view = std::make_shared>(_tensor.get()); + } + + std::shared_ptr> _view = nullptr; + +private: + std::shared_ptr> _data = nullptr; + onert::ir::FeatureShape _shape; + onert::ir::FeatureShape _stride; + std::shared_ptr> _tensor = nullptr; +}; + +using ViewTypes = ::testing::Types; +TYPED_TEST_SUITE(View_nhwc, ViewTypes); +TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes); + +TYPED_TEST(View_nhwc, basic_view) +{ + this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + this->setShape(1, 2, 3, 2); + this->setStride(12, 1, 6, 2); + this->createView(); + + // Data: NCHW + // Shape: NHWC + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8); + ASSERT_EQ(this->_view->at(1, 1, 0), 8); + + // Data: NHWC + // Shape: NHWC + this->createUsingMockTensor(); + + ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6); + ASSERT_EQ(this->_view->at(1, 1, 0), 6); +} diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc index 44d1575..f047771 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.cc +++ b/runtime/onert/core/src/interp/InterpExecutor.cc @@ -14,9 +14,10 @@ * limitations under the License. */ -#include "interp/InterpExecutor.h" -#include "interp/ExecEnv.h" -#include "interp/Interpreter.h" +#include "InterpExecutor.h" + +#include "ExecEnv.h" +#include "Interpreter.h" #include "util/logging.h" diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h index df6153d..d6d5dd0 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.h +++ b/runtime/onert/core/src/interp/InterpExecutor.h @@ -74,7 +74,12 @@ public: } private: - const ir::Graph &_graph; + /** + * @brief Copy of target graph for lowering + * @note It uses copy of graph, not reference. + * Original graph may be deallocated by frontend. + */ + const ir::Graph _graph; ir::OperandIndexMap> _tensor_map; }; diff --git a/runtime/onert/core/src/interp/InterpExecutor.test.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc new file mode 100644 index 0000000..9f95ffe --- /dev/null +++ b/runtime/onert/core/src/interp/InterpExecutor.test.cc @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "InterpExecutor.h" + +#include "exec/Execution.h" +#include "ir/Graph.h" +#include "ir/operation/BinaryArithmetic.h" + +#include + +#include + +namespace +{ + +using namespace onert::ir; +using InterpExecutor = onert::interp::InterpExecutor; +using Execution = onert::exec::Execution; +using Executors = onert::exec::Executors; + +class InterpExecutorTest : public ::testing::Test +{ +protected: + virtual void SetUp() {} + void CreateSimpleModel() + { + // Model: one elementwise add operation + // model input: lhs, rhs + // model output: add result + // lhs, rhs, result shape: {1, 2, 2, 1} + // activation: none (constant) + _graph = std::make_unique(); + + // Add operands + + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs = _graph->addOperand(shape, type); + auto operand_result = _graph->addOperand(shape, type); + + // Add operations + + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param.activation = Activation::NONE; + auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; + auto output_set = OperandIndexSequence{operand_result}; + _graph->addOperation( + std::make_unique(input_set, output_set, param)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs); + _graph->getOutputs().append(operand_result); + + _graph->verify(); + + auto model = std::make_shared(); + model->push(onert::ir::SubgraphIndex{0}, _graph); + + _executors = std::make_shared(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph)); + } + + void CreateTwoStepModel() + { + // Model: two elementwise add operation + // model input: lhs, rhs1 + // model output: second add result (result2) + // constant: rhs2 + // result1 <= (lhs + rhs) + // result2 <= (result1 + rhs2) + // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} + // activation: none (constant) + _graph = std::make_unique(); + + // 1st add operands (result1 <= lhs + rhs1) + + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + static int32_t rhs2_data[4] = {3, 1, -1, 5}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs1 = _graph->addOperand(shape, type); + auto operand_result1 = _graph->addOperand(shape, type); + auto operand_rhs2 = _graph->addOperand(shape, type); + auto operand_result2 = _graph->addOperand(shape, type); + _graph->operands() + .at(operand_rhs2) + .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); + + // 2nd add operations (result2 <= result1 + rhs2) + + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param1.activation = Activation::NONE; + auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; + auto output_set1 = OperandIndexSequence{operand_result1}; + _graph->addOperation( + std::make_unique(input_set1, output_set1, param1)); + + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param2.activation = Activation::NONE; + auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; + auto output_set2 = OperandIndexSequence{operand_result2}; + _graph->addOperation( + std::make_unique(input_set2, output_set2, param2)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs1); + _graph->getOutputs().append(operand_result2); + + _graph->verify(); + + auto model = std::make_shared(); + model->push(onert::ir::SubgraphIndex{0}, _graph); + + _executors = std::make_shared(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph)); + } + + void CreateUnspecifiedDimensionsModel() + { + // Model: one elementwise add operation + // model input: lhs, rhs + // model output: add result + // lhs, rhs, result shape: {1, unknown, 2, 1} + // activation: none (constant) + _graph = std::make_unique(); + + // Add operands + + Shape shape{1, 0, 2, 1}; + TypeInfo type{DataType::INT32}; + Shape shape_scalar(0); + TypeInfo type_scalar{DataType::INT32}; + + auto operand_lhs = _graph->addOperand(shape, type); + auto operand_rhs = _graph->addOperand(shape, type); + + auto operand_activation = _graph->addOperand(shape_scalar, type_scalar); + _graph->operands() + .at(operand_activation) + .data(std::make_unique(reinterpret_cast(&_activation_value), 4)); + + auto operand_result = _graph->addOperand(shape, type); + + // Add operations + + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param.activation = Activation::NONE; + auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; + auto output_set = OperandIndexSequence{operand_result}; + _graph->addOperation( + std::make_unique(input_set, output_set, param)); + + // Identify model inputs and outputs + + _graph->getInputs().append(operand_lhs); + _graph->getInputs().append(operand_rhs); + _graph->getOutputs().append(operand_result); + + _graph->verify(); + + auto model = std::make_shared(); + model->push(onert::ir::SubgraphIndex{0}, _graph); + + _executors = std::make_shared(); + _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph)); + } + + void createExecution() { _execution = std::make_unique(_executors); } + + virtual void TearDown() { _executors = nullptr; } + + std::shared_ptr _graph{nullptr}; + std::shared_ptr _executors{nullptr}; + std::unique_ptr _execution{nullptr}; + const int32_t _activation_value{0}; +}; + +TEST_F(InterpExecutorTest, create_empty) +{ + Graph graph; + graph.verify(); + auto executor = std::make_unique(graph); + ASSERT_NE(executor, nullptr); +} + +TEST_F(InterpExecutorTest, create_simple) +{ + CreateSimpleModel(); + ASSERT_NE(_executors, nullptr); + ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr); +} + +TEST_F(InterpExecutorTest, neg_setInput) +{ + CreateSimpleModel(); + createExecution(); + + auto input1 = IOIndex{0}; + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + + EXPECT_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); +} + +TEST_F(InterpExecutorTest, neg_setOutput) +{ + CreateSimpleModel(); + createExecution(); + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); +} + +TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions) +{ + CreateUnspecifiedDimensionsModel(); + createExecution(); + + auto input1 = IOIndex{0}; + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + + TypeInfo operand_type{DataType::INT32}; + Shape operand_shape{1, 2, 2, 1}; + + EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast(input1_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast(input1_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape, + reinterpret_cast(input1_buffer), 16)); +} + +TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions) +{ + CreateUnspecifiedDimensionsModel(); + createExecution(); + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + TypeInfo operand_type{DataType::INT32}; + Shape operand_shape{1, 2, 2, 1}; + + int32_t output_buffer[4] = {}; + + EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast(output_buffer), 4), + std::runtime_error); + EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast(output_buffer), 12), + std::runtime_error); + EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape, + reinterpret_cast(output_buffer), 16)); +} + +TEST_F(InterpExecutorTest, execute) +{ + CreateSimpleModel(); + createExecution(); + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto input1_idx = _graph->getInputs().at(input1); + auto input2_idx = _graph->getInputs().at(input2); + + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + const int32_t input2_buffer[4] = {1, -3, 2, -4}; + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); + EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast(input2_buffer), 16)); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); + EXPECT_NO_THROW(_execution->execute()); + EXPECT_EQ(output_buffer[0], 2); + EXPECT_EQ(output_buffer[1], -3); + EXPECT_EQ(output_buffer[2], 1); + EXPECT_EQ(output_buffer[3], -6); +} + +TEST_F(InterpExecutorTest, executeTwoStep) +{ + CreateTwoStepModel(); + createExecution(); + + auto input1 = IOIndex{0}; + auto input2 = IOIndex{1}; + auto input1_idx = _graph->getInputs().at(input1); + auto input2_idx = _graph->getInputs().at(input2); + + const int32_t input1_buffer[4] = {1, 0, -1, -2}; + const int32_t input2_buffer[4] = {1, -3, 2, -4}; + + auto output = IOIndex{0}; + auto output_idx = _graph->getOutputs().at(output); + + int32_t output_buffer[4] = {}; + + EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); + EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast(input2_buffer), 16)); + EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); + EXPECT_NO_THROW(_execution->execute()); + EXPECT_EQ(output_buffer[0], 5); + EXPECT_EQ(output_buffer[1], -2); + EXPECT_EQ(output_buffer[2], 0); + EXPECT_EQ(output_buffer[3], -1); +} + +} // namespace diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc index 804e9fb..fe4acd3 100644 --- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc +++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/BinaryArithmetic.h" -#include "misc/polymorphic_downcast.h" -#include "cker/Types.h" + +#include +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc index a063ab1..1036046 100644 --- a/runtime/onert/core/src/interp/operations/Concat.cc +++ b/runtime/onert/core/src/interp/operations/Concat.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Concat.h" -#include "misc/polymorphic_downcast.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc index 0b43a47..72c2057 100644 --- a/runtime/onert/core/src/interp/operations/Conv2D.cc +++ b/runtime/onert/core/src/interp/operations/Conv2D.cc @@ -14,15 +14,15 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Conv2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" +#include "util/Utils.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc index d1c62d7..9f52744 100644 --- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc @@ -14,15 +14,15 @@ * limitations under the License. */ -#include -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/DepthwiseConv2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" +#include "util/Utils.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc index 197855f..e13080e 100644 --- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc +++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc @@ -14,17 +14,16 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" - -#include "interp/Registration.h" +#include "../Registration.h" #include "ir/operation/ElementwiseActivation.h" -#include #include #include +#include + +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc index ef82760..2bc9f51 100644 --- a/runtime/onert/core/src/interp/operations/FullyConnected.cc +++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/FullyConnected.h" -#include "misc/polymorphic_downcast.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc index 0ea6087..d686cfc 100644 --- a/runtime/onert/core/src/interp/operations/Gather.cc +++ b/runtime/onert/core/src/interp/operations/Gather.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Gather.h" -#include "misc/polymorphic_downcast.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc index b5c3881..3180884 100644 --- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc +++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/InstanceNorm.h" -#include "misc/polymorphic_downcast.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc index 0eec7fe..3db0828 100644 --- a/runtime/onert/core/src/interp/operations/Pad.cc +++ b/runtime/onert/core/src/interp/operations/Pad.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Pad.h" +#include + namespace onert { namespace interp diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc index 2f3b716..3935d47 100644 --- a/runtime/onert/core/src/interp/operations/Pool2D.cc +++ b/runtime/onert/core/src/interp/operations/Pool2D.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Pool2D.h" -#include "util/Utils.h" #include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" +#include "util/Utils.h" + +#include +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc index 3a11845..1de5a57 100644 --- a/runtime/onert/core/src/interp/operations/Reshape.cc +++ b/runtime/onert/core/src/interp/operations/Reshape.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "interp/Registration.h" +#include "../Registration.h" namespace onert { diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc index 1fc3031..8be2f22 100644 --- a/runtime/onert/core/src/interp/operations/Softmax.cc +++ b/runtime/onert/core/src/interp/operations/Softmax.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/Softmax.h" -#include "misc/polymorphic_downcast.h" + +#include +#include namespace onert { diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc index 755103d..59c8e8c 100644 --- a/runtime/onert/core/src/interp/operations/TransposeConv.cc +++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include -#include - #include "OperationUtil.h" +#include "../Registration.h" -#include "interp/Registration.h" #include "ir/operation/TransposeConv.h" +#include +#include + namespace onert { namespace interp diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc index df30bbd..28cf413 100644 --- a/runtime/onert/core/src/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.cc @@ -17,19 +17,9 @@ #include "ir/Graph.h" #include "OperationValidator.h" +#include "verifier/Verifier.h" -#include - -#include -#include - -#include "util/logging.h" #include "util/Set.h" -#include "verifier/Verifier.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperationIndexMap.h" -#include "dumper/text/GraphDumper.h" -#include "backend/IConfig.h" namespace onert { @@ -38,6 +28,8 @@ namespace ir Graph::Graph() = default; +Graph::Graph(const Graph &) = default; + Graph::~Graph(void) = default; OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type) diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc new file mode 100644 index 0000000..1445007 --- /dev/null +++ b/runtime/onert/core/src/ir/Graph.test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" +#include "ir/operation/BinaryArithmetic.h" + +#include + +TEST(Graph, neg_inputs_and_outputs) +{ + onert::ir::Graph graph; + + onert::ir::OperandIndex index0{0u}; + onert::ir::OperandIndex index1{1u}; + + graph.addInput({index0}); + graph.addInput({index1}); + + onert::ir::OperandIndex index10{10u}; + onert::ir::OperandIndex index11{11u}; + onert::ir::OperandIndex index12{12u}; + + graph.addOutput({index10}); + graph.addOutput({index11}); + graph.addOutput({index12}); + + ASSERT_EQ(graph.getInputs().size(), 2); + ASSERT_EQ(graph.getOutputs().size(), 3); + + onert::ir::IOIndex io_index0{0}; + onert::ir::IOIndex io_index1{1}; + onert::ir::IOIndex io_index2{2}; + + ASSERT_EQ(graph.getInputs().at(io_index0), 0); + ASSERT_EQ(graph.getInputs().at(io_index1), 1); + + ASSERT_EQ(graph.getOutputs().at(io_index0), 10); + ASSERT_EQ(graph.getOutputs().at(io_index1), 11); + ASSERT_EQ(graph.getOutputs().at(io_index2), 12); + + EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range); +} + +using namespace onert::ir; + +OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs, + const OperandIndexSequence outputs) +{ + // Add "ADD" operation + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; + param.activation = Activation::NONE; + return graph.addOperation(std::make_unique(inputs, outputs, param)); +} + +TEST(Graph, OneOpGraphSimpleValid) +{ + // Simple Graph with just one Add operation + + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto lhs = graph.addOperand(shape, type); + auto rhs = graph.addOperand(shape, type); + auto res = graph.addOperand(shape, type); + + addAddOperation(graph, {lhs, rhs}, {res}); + + // Set model inputs/outputs + graph.addInput(lhs); + graph.addInput(rhs); + graph.addOutput(res); + + graph.verify(); + + SUCCEED(); +} + +TEST(Graph, neg_InvalidGraph_BadInput) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + graph.addInput(OperandIndex{89}); // Non-exisiting operand! + + EXPECT_ANY_THROW(graph.verify()); +} + +TEST(Graph, neg_InvalidGraph_BadOutput) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto in = graph.addOperand(shape, type); + auto out = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(in); + graph.addOutput(out); + graph.addOutput(OperandIndex{12}); // Non-exisiting operand! + + EXPECT_ANY_THROW(graph.verify()); +} + +TEST(Graph, neg_InvalidAddOperation_BadInputIndex) +{ + Graph graph; + + // Add tensors + Shape shape{1, 2, 2, 1}; + TypeInfo type{DataType::FLOAT32}; + auto lhs = graph.addOperand(shape, type); + auto rhs = graph.addOperand(shape, type); + auto res = graph.addOperand(shape, type); + + // Set model inputs/outputs + graph.addInput(lhs); + graph.addInput(rhs); + graph.addOutput(res); + + ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid()); +} diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc new file mode 100644 index 0000000..fc956ab --- /dev/null +++ b/runtime/onert/core/src/ir/LayoutSet.test.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LayoutSet.h" + +#include + +using onert::ir::Layout; +using onert::ir::LayoutSet; + +TEST(ir_LayoutSet, neg_add_remove) +{ + LayoutSet set{Layout::NCHW}; + set.remove(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 2); + set.remove(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.remove(Layout::NCHW); + ASSERT_EQ(set.size(), 0); + set.remove(Layout::NCHW); + ASSERT_EQ(set.size(), 0); +} + +TEST(ir_LayoutSet, neg_add_twice) +{ + LayoutSet set; + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 1); + set.add(Layout::NHWC); + ASSERT_EQ(set.size(), 1); +} + +TEST(ir_LayoutSet, set_operators) +{ + LayoutSet set1{Layout::NCHW}; + LayoutSet set2{Layout::NHWC}; + LayoutSet set3 = set1 | set2; + + ASSERT_EQ(set3.size(), 2); + + ASSERT_EQ((set3 - set1).size(), 1); + ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true); + ASSERT_EQ((set3 - set2).size(), 1); + ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true); + ASSERT_EQ((set3 - set3).size(), 0); + + ASSERT_EQ((set3 & set1).size(), 1); + ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true); + ASSERT_EQ((set3 & set2).size(), 1); + ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true); + ASSERT_EQ((set1 & set2).size(), 0); +} diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h similarity index 100% rename from runtime/onert/test/core/ir/MockNode.h rename to runtime/onert/core/src/ir/MockNode.h diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc new file mode 100644 index 0000000..0b85879 --- /dev/null +++ b/runtime/onert/core/src/ir/Operand.test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" + +#include "MockNode.h" +#include "verifier/Verifier.h" + +#include + +#include +#include + +namespace +{ + +using IndexSet = onert::ir::OperandIndexSequence; +using Mock = onert_test::ir::SimpleMock; + +} // namespace + +TEST(ir_Operand, neg_usedef) +{ + onert::ir::Graph graph; + onert::ir::verifier::DAGChecker verifier; + + onert::ir::Shape shape(3); + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + // Model Input/Output + auto input_operand = graph.addOperand(shape, type); + auto output_operand = graph.addOperand(shape, type); + + graph.addInput(input_operand); + graph.addOutput(output_operand); + + // MockNode1 + auto operand_index1 = graph.addOperand(shape, type); + auto mocknode_index1 = + graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index1})); + + // MockNode2 + auto operand_index2 = graph.addOperand(shape, type); + auto mocknode_index2 = + graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index2})); + + // MockNode3(two input) + auto multiinput_index = graph.addOperation( + std::make_unique(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); + + graph.verify(); + + ASSERT_TRUE(verifier.verify(graph)); + + // Check def + ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1); + ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2); + ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index); + + ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2); + ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index); + + // Check use + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true); + ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true); + ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true); + + ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2); + ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1); + ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0); +} diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc new file mode 100644 index 0000000..588c4e4 --- /dev/null +++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OperandIndexSequence.h" + +#include + +using onert::ir::OperandIndex; +using onert::ir::OperandIndexSequence; + +TEST(ir_OperandIndexSequence, neg_append) +{ + OperandIndexSequence iset{0, 2, 4, 8}; + + ASSERT_EQ(iset.size(), 4); + + iset.append(OperandIndex{10}); + + ASSERT_EQ(iset.size(), 5); + + onert::ir::IOIndex index1{1}; + onert::ir::IOIndex index2{4}; + + ASSERT_EQ(iset.at(index1), 2); + ASSERT_EQ(iset.at(index2), 10); + + ASSERT_TRUE(iset.contains(OperandIndex{2})); + ASSERT_TRUE(iset.contains(OperandIndex{10})); + ASSERT_FALSE(iset.contains(OperandIndex{11})); +} + +TEST(graph_OperandIndexSequence, neg_replace) +{ + OperandIndexSequence iset{0, 1, 2, 3}; + + iset.replace(OperandIndex{1}, OperandIndex{9}); + ASSERT_FALSE(iset.contains(OperandIndex{1})); + ASSERT_TRUE(iset.contains(OperandIndex{9})); +} diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc new file mode 100644 index 0000000..aff228b --- /dev/null +++ b/runtime/onert/core/src/ir/Operands.test.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operands.h" + +#include + +TEST(ir_Operands, neg_set_test) +{ + onert::ir::Operands set; + + onert::ir::Shape shape0{1, 2, 3}; + + onert::ir::Shape shape1(4); + shape1.dim(0) = 10; + shape1.dim(1) = 20; + shape1.dim(2) = 30; + shape1.dim(3) = 40; + + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + set.emplace(shape0, type); + set.emplace(shape1, type); + + ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true); + ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true); + ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false); + + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1); + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2); + ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3); +} diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc new file mode 100644 index 0000000..b3c4e85 --- /dev/null +++ b/runtime/onert/core/src/ir/Operation.test.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/OperandIndexSequence.h" +#include "ir/operation/Concat.h" +#include "ir/operation/Conv2D.h" + +#include + +#include +#include + +using Index = onert::ir::IOIndex; +using IndexSet = onert::ir::OperandIndexSequence; + +TEST(ir_Operation_setIO, operation_setIO_conv) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + // Add Conv + using Graph = onert::ir::operation::Conv2D; + + auto input_operand = graph.addOperand(shape, type); + auto kernel_operand = graph.addOperand(shape, type); + auto bias_operand = graph.addOperand(shape, type); + IndexSet inputs{input_operand, kernel_operand, bias_operand}; + + Graph::Param conv_params; + conv_params.padding.type = onert::ir::PaddingType::SAME; + conv_params.stride.horizontal = 1; + conv_params.stride.vertical = 1; + conv_params.activation = onert::ir::Activation::NONE; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto conv = std::make_unique(inputs, outputs, conv_params); + + ASSERT_NE(conv, nullptr); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + conv->setInputs({8, 9, 10}); + ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8); +} + +TEST(ir_Operation_setIO, neg_operation_setIO_concat) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + using Graph = onert::ir::operation::Concat; + + // Add Concat + IndexSet inputs; + for (int i = 0; i < 6; ++i) + { + inputs.append(graph.addOperand(shape, type)); + } + + Graph::Param concat_params{0}; + + auto output_operand = graph.addOperand(shape, type).value(); + IndexSet outputs{output_operand}; + + auto concat = std::make_unique(inputs, outputs, concat_params); + + ASSERT_NE(concat, nullptr); + ASSERT_EQ(concat->getInputs().size(), 6); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + + concat->setInputs({80, 6, 9, 11}); + ASSERT_EQ(concat->getInputs().size(), 4); + ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); + ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80); + ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9); + ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range); +} diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc new file mode 100644 index 0000000..e578726 --- /dev/null +++ b/runtime/onert/core/src/ir/Operations.test.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Operations.h" + +#include "MockNode.h" + +#include + +using onert::ir::Operation; +using onert::ir::OperationIndex; +using onert::ir::Operations; + +TEST(ir_Operations, basic) +{ + Operations ops; + ops.push(std::unique_ptr(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); + OperationIndex idx{0u}; + ASSERT_EQ(ops.at(idx).getInputs().size(), 4); + ASSERT_EQ(ops.at(idx).getOutputs().size(), 3); +} + +TEST(ir_Operations, neg_at) +{ + Operations ops; + ops.push(std::unique_ptr(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); + OperationIndex idx{99u}; + EXPECT_THROW(ops.at(idx), std::out_of_range); +} diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc new file mode 100644 index 0000000..afdb292 --- /dev/null +++ b/runtime/onert/core/src/ir/Shape.test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/Shape.h" + +#include + +TEST(ShapeTest, basic_test) +{ + { + onert::ir::Shape shape(3); + + shape.dim(0) = 1; + shape.dim(1) = 2; + shape.dim(2) = 3; + + ASSERT_EQ(shape.rank(), 3); + ASSERT_EQ(shape.num_elements(), 6); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); + ASSERT_EQ(shape.hasUnspecifiedDims(), false); + } + { + onert::ir::Shape shape; // scalar or rank is unspecified + + ASSERT_EQ(shape.rank(), 0); + ASSERT_EQ(shape.num_elements(), 1); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true); + ASSERT_EQ(shape.hasUnspecifiedDims(), false); + } +} + +TEST(ShapeTest, neg_basic_test) +{ + { + onert::ir::Shape shape(2); + + shape.dim(0) = 1; + shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM; + + ASSERT_EQ(shape.rank(), 2); + ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); + ASSERT_EQ(shape.hasUnspecifiedDims(), true); + EXPECT_ANY_THROW(shape.num_elements()); + } +} diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc new file mode 100644 index 0000000..1ec71cd --- /dev/null +++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Verifier.h" + +#include "../MockNode.h" + +#include "ir/Graph.h" + +#include + +#include + +using IndexSet = onert::ir::OperandIndexSequence; +using Mock = onert_test::ir::SimpleMock; + +TEST(Verifier, dag_checker) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + graph.addOperation(std::make_unique(IndexSet{operand1}, IndexSet{operand2})); + + onert::ir::verifier::DAGChecker verifier; + + ASSERT_TRUE(verifier.verify(graph)); +} + +TEST(Verifier, neg_edge_consistency_checker_1) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + auto mock_op = std::make_unique(IndexSet{operand1}, IndexSet{operand2}); + auto op_ind = graph.addOperation(std::move(mock_op)); + + graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone + + onert::ir::verifier::EdgeChecker verifier; + ASSERT_FALSE(verifier.verify(graph)); +} + +TEST(Verifier, neg_edge_consistency_checker_2) +{ + onert::ir::Graph graph; + + onert::ir::Shape shape{3}; + onert::ir::TypeInfo type{onert::ir::DataType::INT32}; + + auto operand1 = graph.addOperand(shape, type); + auto operand2 = graph.addOperand(shape, type); + + graph.addInput(operand1); + graph.addOutput(operand2); + + auto mock_op = std::make_unique(IndexSet{operand1}, IndexSet{operand2}); + auto mock_op_ptr = mock_op.get(); + auto op_ind = graph.addOperation(std::move(mock_op)); + + mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone + + onert::ir::verifier::EdgeChecker verifier; + ASSERT_FALSE(verifier.verify(graph)); +} diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc index 3fc0c8e..d868efe 100644 --- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc +++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include -#include #include +#include #include +#include // json type for ChromeTracingWriter namespace diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc index 9da93f6..b7fcefc 100644 --- a/runtime/onert/core/src/util/ConfigSource.cc +++ b/runtime/onert/core/src/util/ConfigSource.cc @@ -15,13 +15,15 @@ */ #include "util/ConfigSource.h" -#include "util/GeneralConfigSource.h" -#include "util/EnvConfigSource.h" +#include "util/logging.h" + +#include +#include +#include -#include #include +#include #include - #include namespace onert @@ -29,12 +31,27 @@ namespace onert namespace util { +using namespace nnfw::misc; + static std::unique_ptr _source; static std::unique_ptr _source_ext; void config_source(std::unique_ptr &&source) { _source = std::move(source); } void config_source_ext(std::unique_ptr &&source) { _source_ext = std::move(source); } +void setConfigKeyValues(const CfgKeyValues &keyValues) +{ + auto configsrc = std::make_unique(); + + for (auto it = keyValues.begin(); it != keyValues.end(); ++it) + { + VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; + configsrc->set(it->first, it->second); + } + + onert::util::config_source_ext(std::move(configsrc)); +} + static IConfigSource *config_source() { if (!_source) diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc deleted file mode 100644 index 0d25b73..0000000 --- a/runtime/onert/core/src/util/EnvConfigSource.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/EnvConfigSource.h" - -#include - -namespace onert -{ -namespace util -{ - -std::string EnvConfigSource::get(const std::string &key) const -{ - const char *value = std::getenv(key.c_str()); - if (value != nullptr) - { - return value; - } - else - { - return GeneralConfigSource::get(key); - } -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc index 83c2649..c1b9c43 100644 --- a/runtime/onert/core/src/util/EventCollector.cc +++ b/runtime/onert/core/src/util/EventCollector.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventCollector.h" +#include "EventCollector.h" // C++ standard libraries #include diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h index 774fe05..effb723 100644 --- a/runtime/onert/core/src/util/EventCollector.h +++ b/runtime/onert/core/src/util/EventCollector.h @@ -17,12 +17,13 @@ #ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__ #define __ONERT_UTIL_EVENT_COLLECTOR_H__ -#include "util/EventRecorder.h" +#include "EventRecorder.h" + #include "util/TracingCtx.h" -#include -#include #include +#include +#include class EventCollector { diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc index 5d3d5f5..85a588d 100644 --- a/runtime/onert/core/src/util/EventRecorder.cc +++ b/runtime/onert/core/src/util/EventRecorder.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventRecorder.h" +#include "EventRecorder.h" void EventRecorder::emit(std::unique_ptr &&evt) { diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc index c42c537..ca4bd30 100644 --- a/runtime/onert/core/src/util/EventWriter.cc +++ b/runtime/onert/core/src/util/EventWriter.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" #include diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc deleted file mode 100644 index 7d2757e..0000000 --- a/runtime/onert/core/src/util/GeneralConfigSource.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/GeneralConfigSource.h" -#include "util/logging.h" - -namespace onert -{ -namespace util -{ - -std::string GeneralConfigSource::get(const std::string &key) const -{ - auto itr = _map.find(key); - if (itr == _map.end()) - { - return ""; - } - else - { - return itr->second; - } -} - -void GeneralConfigSource::set(const std::string &key, const std::string &val) -{ - VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl; - _map[key] = val; -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/Index.test.cc b/runtime/onert/core/src/util/Index.test.cc new file mode 100644 index 0000000..ff73e5e --- /dev/null +++ b/runtime/onert/core/src/util/Index.test.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Index.h" + +#include + +using Index = ::onert::util::Index; + +TEST(Index, neg_index_test) +{ + Index idx1{1u}; + Index idx2{2u}; + Index idx3{idx1}; + + ASSERT_EQ(idx1, 1); + ASSERT_EQ(idx1, 1u); + ASSERT_EQ(idx1.value(), 1u); + ASSERT_NE(idx1, idx2); + ASSERT_EQ(idx1, idx3); +} diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc index b7fbac5..7a8b9f2 100644 --- a/runtime/onert/core/src/util/MDTableEventWriter.cc +++ b/runtime/onert/core/src/util/MDTableEventWriter.cc @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include -#include -#include #include -#include #include #include +#include #include +#include +#include +#include // md table type namespace diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc new file mode 100644 index 0000000..3fe7357 --- /dev/null +++ b/runtime/onert/core/src/util/ObjectManager.test.cc @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/Index.h" +#include "util/ObjectManager.h" + +#include + +using namespace onert; + +struct TestTag; +using Index = typename util::Index; + +TEST(ObjectManager, emplace) +{ + util::ObjectManager man; + + auto index = man.emplace(100); + ASSERT_EQ(man.at(index), 100); +} + +TEST(ObjectManager, neg_remove_1) +{ + util::ObjectManager man; + + Index index = man.emplace(100); + ASSERT_TRUE(man.exist(index)); + ASSERT_EQ(man.at(index), 100); + + man.remove(index); + ASSERT_FALSE(man.exist(index)); +} + +TEST(ObjectManager, neg_remove_2) +{ + util::ObjectManager man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + ASSERT_TRUE(man.exist(index0)); + ASSERT_EQ(man.at(index0), 100); + ASSERT_TRUE(man.exist(index1)); + ASSERT_EQ(man.at(index1), 200); + + man.remove(index0); + ASSERT_FALSE(man.exist(index0)); + ASSERT_TRUE(man.exist(index1)); + ASSERT_EQ(man.at(index1), 200); +} + +TEST(ObjectManager, push) +{ + util::ObjectManager man; + + // Not specify index + auto index = man.push(std::make_unique(100)); + ASSERT_EQ(man.at(index), 100); + + // Specify index + auto index2 = man.push(std::make_unique(200), Index{33}); + ASSERT_EQ(index2.value(), 33); + ASSERT_EQ(man.at(index2), 200); + + auto index3 = man.push(std::make_unique(300)); + // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) + ASSERT_EQ(index3.value(), 34); + ASSERT_EQ(man.at(index3), 300); + + auto index4 = man.push(std::make_unique(400), Index{22}); + ASSERT_EQ(index4.value(), 22); + ASSERT_EQ(man.at(index4), 400); + + auto index5 = man.push(std::make_unique(500)); + // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) + ASSERT_EQ(index5.value(), 35); + ASSERT_EQ(man.at(index5), 500); +} + +TEST(ObjectManager, neg_push) +{ + util::ObjectManager man; + + // Specify index + auto index = man.push(std::make_unique(100), Index{55}); + ASSERT_EQ(index.value(), 55); + ASSERT_EQ(man.at(index), 100); + + // Specify the same index + auto index2 = man.push(std::make_unique(200), Index{55}); + ASSERT_FALSE(index2.valid()); +} + +static const uint32_t kMaxUInt32 = std::numeric_limits::max(); + +TEST(ObjectManager, neg_push_undefined_index) +{ + util::ObjectManager man; + + // Try inserting invalid(undefined) index + auto index = man.push(std::make_unique(100), Index{kMaxUInt32}); + ASSERT_FALSE(index.valid()); + ASSERT_EQ(man.size(), 0); +} + +TEST(ObjectManager, neg_push_max_index) +{ + util::ObjectManager man; + + // Insert an object with maximum valid index + auto index = man.push(std::make_unique(100), Index{kMaxUInt32 - 1}); + ASSERT_EQ(index.value(), kMaxUInt32 - 1); + ASSERT_EQ(man.at(index), 100); + ASSERT_EQ(man.size(), 1); + + // Reached to the final index so next push/emplace must fail + auto index2 = man.push(std::make_unique(200)); + ASSERT_EQ(man.size(), 1); + ASSERT_FALSE(index2.valid()); +} + +TEST(ObjectManager, neg_emplace_max_index) +{ + util::ObjectManager man; + + // Insert an object with maximum valid index + auto index = man.push(std::make_unique(100), Index{kMaxUInt32 - 1}); + ASSERT_EQ(index.value(), kMaxUInt32 - 1); + ASSERT_EQ(man.at(index), 100); + ASSERT_EQ(man.size(), 1); + + // Reached to the final index so next push/emplace must fail + auto index3 = man.emplace(200); + ASSERT_EQ(man.size(), 1); + ASSERT_FALSE(index3.valid()); +} + +TEST(ObjectManager, const_iterate) +{ + util::ObjectManager man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + auto index2 = man.emplace(300); + + int sum = 0; + man.iterate([&](const Index &index, const int &val) { sum += val; }); + ASSERT_EQ(sum, 600); +} + +TEST(ObjectManager, non_const_iterate) +{ + util::ObjectManager man; + + auto index0 = man.emplace(100); + auto index1 = man.emplace(200); + auto index2 = man.emplace(300); + + man.iterate([&](const Index &index, int &val) { val += 1; }); + ASSERT_EQ(man.at(index0), 101); + ASSERT_EQ(man.at(index1), 201); + ASSERT_EQ(man.at(index2), 301); +} + +TEST(ObjectManager, set) +{ + util::ObjectManager man; + auto index = man.set(Index{1}, std::make_unique(100)); // Insert + ASSERT_EQ(index, Index{1}); + auto index2 = man.set(index, std::make_unique(200)); // Overwrite + ASSERT_EQ(index2, index); + ASSERT_EQ(man.at(index2), 200); +} + +TEST(ObjectManager, neg_set) +{ + auto v = std::make_unique(100); + util::ObjectManager man; + auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index + ASSERT_EQ(index, Index{}); + ASSERT_FALSE(index.valid()); + ASSERT_NE(v, nullptr); // v must be kept when failure +} + +TEST(ObjectManager, getRawPtr) +{ + auto v = std::make_unique(100); + auto v_ptr = v.get(); + util::ObjectManager man; + auto index = man.push(std::move(v)); + ASSERT_EQ(v_ptr, man.getRawPtr(index)); +} + +TEST(ObjectManager, neg_getRawPtr) +{ + util::ObjectManager man; + auto ptr = man.getRawPtr(Index{1}); + ASSERT_EQ(ptr, nullptr); +} diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc index 6f03cfc..4dea6d1 100644 --- a/runtime/onert/core/src/util/SNPEEventWriter.cc +++ b/runtime/onert/core/src/util/SNPEEventWriter.cc @@ -14,11 +14,12 @@ * limitations under the License. */ -#include "util/EventWriter.h" +#include "EventWriter.h" -#include #include + #include +#include #include /** diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc new file mode 100644 index 0000000..96579bf --- /dev/null +++ b/runtime/onert/core/src/util/ShapeInference.test.cc @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/ShapeInference.h" + +#include + +using namespace onert::ir; + +TEST(ShapeInference, Elementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{3}; + auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.dim(0), 1); + ASSERT_EQ(infered_out_shape.dim(1), 299); + ASSERT_EQ(infered_out_shape.dim(2), 299); + ASSERT_EQ(infered_out_shape.dim(3), 3); +} + +TEST(ShapeInference, neg_Elementwise) +{ + Shape lhs_shape{1, 299, 299, 3}; + Shape rhs_shape{5, 3}; + ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error); +} + +TEST(ShapeInference, Pool2DNodeSame) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::SAME}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeValid) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{3, 7}; + Padding padding{PaddingType::VALID}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, Pool2DNodeExplicit) +{ + Shape in_shape{10, 3, 5, 20}; + + Stride stride{3, 7}; + Padding padding{4, 3, 2, 1}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); + + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); +} + +TEST(ShapeInference, neg_Pool2DNode_InvalidStride) +{ + Shape in_shape{10, 6, 12, 20}; + Stride stride{0, 7}; + Padding padding{PaddingType::SAME}; + + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param), + std::runtime_error); +} + +TEST(ShapeInference, Conv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{30, 3, 6, 20}; + + operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE, + Dilation{1, 1}}; + auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE, + Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); + + param = + operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); +} + +TEST(ShapeInference, neg_Conv2D_InvalidStride) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{30, 3, 6, 20}; + + operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE, + Dilation{1, 1}}; + ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param), + std::runtime_error); +} + +TEST(ShapeInference, DepthwiseConv2D) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{1, 3, 6, 60}; + + operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, + Activation::NONE, Dilation{1, 1}}; + auto infered_out_shape = + onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3, + Activation::NONE, Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); + + param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE, + Dilation{1, 1}}; + infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + + ASSERT_EQ(infered_out_shape.rank(), 4); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); + ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); +} + +TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride) +{ + Shape in_shape{10, 6, 12, 20}; + Shape ker_shape{1, 3, 6, 60}; + + operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3, + Activation::NONE, Dilation{1, 1}}; + ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param), + std::runtime_error); +} + +TEST(ShapeInference, Concat) +{ + { + Shape in1{10, 20, 30, 3, 50}; + Shape in2{10, 20, 30, 2, 50}; + Shape in3{10, 20, 30, 2, 50}; + + operation::Concat::Param param{3}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param); + + ASSERT_EQ(infered_out_shape.rank(), 5); + ASSERT_EQ(infered_out_shape.dim(0), 10); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 30); + ASSERT_EQ(infered_out_shape.dim(3), 7); + ASSERT_EQ(infered_out_shape.dim(4), 50); + } + { + // case 1. when axis < 0 + Shape in1{10, 20, 2}; + Shape in2{10, 20, 3}; + + operation::Concat::Param param{-1}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); + + ASSERT_EQ(infered_out_shape.rank(), 3); + ASSERT_EQ(infered_out_shape.dim(0), 10); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 5); + } + { + // case 2. when axis < 0 + Shape in1{2, 20, 2}; + Shape in2{3, 20, 2}; + + operation::Concat::Param param{-3}; + auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); + + ASSERT_EQ(infered_out_shape.rank(), 3); + ASSERT_EQ(infered_out_shape.dim(0), 5); + ASSERT_EQ(infered_out_shape.dim(1), 20); + ASSERT_EQ(infered_out_shape.dim(2), 2); + } +} + +TEST(ShapeInference, neg_Concat) +{ + { + operation::Concat::Param param{2}; + Shape in1{10, 1, 3}; + Shape in2{10, 2, 4}; // dim[1] should be 1 but 2 + + EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); + } + { // wrong rank + operation::Concat::Param param{2}; + Shape in1{10, 2, 3, 4}; + Shape in2{10, 2, 4}; // rank should be 4 + + EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); + } +} + +TEST(ShapeInference, ExpandDims) +{ + Shape in_shape{30, 40}; + + auto check = [&](int32_t axis, Shape &expected) { + auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis); + + ASSERT_EQ(actual.rank(), 3); + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { // boundary + int32_t axis = 0; + Shape expected{1, 30, 40}; + check(axis, expected); + } + { // boundary + int32_t axis = 2; + Shape expected{30, 40, 1}; + check(axis, expected); + } + { // inside + int32_t axis = 1; + Shape expected{30, 1, 40}; + check(axis, expected); + } + { // negative boundary + int32_t axis = -1; + Shape expected{30, 40, 1}; + check(axis, expected); + } + { // negative boundary + int32_t axis = -3; + Shape expected{1, 30, 40}; + check(axis, expected); + } +} + +TEST(ShapeInference, neg_ExpandDims) +{ + Shape in_shape{30, 40}; + + { // over boundary + int32_t axis = 3; + ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); + } + { // over boundary + int32_t axis = -4; + ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); + } +} + +TEST(ShapeInference, FullyConnected) +{ + Shape in_shape{3, 4, 5, 6}; + Shape ker_shape{3, 10}; + auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape); + + ASSERT_EQ(infered_out_shape.rank(), 2); + ASSERT_EQ(infered_out_shape.dim(0), 36); + ASSERT_EQ(infered_out_shape.dim(1), 3); +} + +TEST(ShapeInference, Transpose) +{ + auto check = [&](Shape &in_shape, std::vector perm, Shape &expected) { + // pre-conditions + ASSERT_EQ(in_shape.rank(), perm.size()); + ASSERT_EQ(expected.rank(), perm.size()); + auto inferred_out_shape = + onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); + // post-conditions + ASSERT_EQ(inferred_out_shape.rank(), perm.size()); + for (int32_t dim = 0; dim < expected.rank(); dim++) + { + ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim)); + } + }; + // check for 2-D + { + Shape in_shape{2, 3}; + std::vector perm = {1, 0}; + Shape expected{3, 2}; + // int32_t rank = 2; + check(in_shape, perm, expected); + } + // check for 3-D + { + Shape in_shape{1, 2, 3}; + std::vector perm = {2, 0, 1}; + Shape expected{3, 1, 2}; + // int32_t rank = 3; + check(in_shape, perm, expected); + } + // check for 4-D + { + Shape in_shape{1, 2, 3, 4}; + std::vector perm = {1, 3, 0, 2}; + Shape expected{2, 4, 1, 3}; + // int32_t rank = 4; + check(in_shape, perm, expected); + } +} + +TEST(ShapeInference, neg_Transpose) +{ + Shape in_shape{1, 2, 3}; + // Invalid parameter size + { + std::vector perm = {2, 0, 1, 0}; + // int32_t rank = 3; + ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), + std::runtime_error); + } + // Invalid parameter value + { + std::vector perm = {2, 0, 3}; + // int32_t rank = 3; + ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), + std::runtime_error); + } +} + +TEST(ShapeInference, Gather) +{ + auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) { + int rank = input.rank(); + auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank); + + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + // check for 2-D, 3-D, axis 0 + { + Shape input{3, 4}; + Shape indices{1, 1, 2}; + int32_t axis = 0; + Shape expected{1, 1, 2, 4}; + check(input, indices, expected, axis); + } + + // check for 2-D, 3-D, axis 1 + { + Shape input{3, 4}; + Shape indices{1, 2, 1}; + int32_t axis = 1; + Shape expected{3, 1, 2, 1}; + check(input, indices, expected, axis); + } + + // check for 3-D, 2-D, axis 0 + { + Shape input{2, 3, 4}; + Shape indices{1, 2}; + int32_t axis = 0; + Shape expected{1, 2, 3, 4}; + check(input, indices, expected, axis); + } + + // check for 3-D, 2-D, axis 2 + { + Shape input{2, 3, 4}; + Shape indices{2, 1}; + int32_t axis = 2; + Shape expected{2, 3, 2, 1}; + check(input, indices, expected, axis); + } + + // check for 4D, axis 0 + { + Shape input{1, 2, 3, 4}; + Shape indices{2}; + int32_t axis = 0; + Shape expected{2, 2, 3, 4}; + check(input, indices, expected, axis); + } +} + +TEST(ShapeInference, BCQFullyConnected) +{ + auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector cluster, + Shape &expected) { + auto actual = + onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data()); + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { + Shape in_shape{10, 1}; + Shape cluster_shape{3, 2}; + std::vector cluster = {1, 10, 2, 10, 3, 10}; + + Shape expected{30, 1}; + check(in_shape, cluster_shape, cluster, expected); + } + + { + Shape in_shape{1, 1}; + Shape cluster_shape{1, 2}; + std::vector cluster = {3, 50}; + + Shape expected{50, 1}; + check(in_shape, cluster_shape, cluster, expected); + } +} + +TEST(ShapeInference, BCQGather) +{ + auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector cluster, + uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) { + operation::BCQGather::Param param{hidden_size, axis}; + auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape, + cluster.data(), rank, param); + ASSERT_EQ(actual.rank(), expected.rank()); + + for (int32_t dim = 0; dim < expected.rank(); dim++) + ASSERT_EQ(actual.dim(dim), expected.dim(dim)); + }; + + { + Shape indices_shape{5, 1}; + Shape cluster_shape{3, 2}; + std::vector cluster = {1, 10, 2, 10, 3, 10}; + uint32_t hidden_size = 10; + uint32_t axis = 0; + int rank = 2; + + Shape expected{5, 1, 10}; + check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); + } + + { + Shape indices_shape{5, 1}; + Shape cluster_shape{3, 2}; + std::vector cluster = {1, 10, 2, 10, 3, 10}; + uint32_t hidden_size = 10; + uint32_t axis = 1; + int rank = 2; + + Shape expected{30, 5, 1}; + check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); + } +} diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index 5649f28..cf080ab 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -65,10 +65,10 @@ public: /** * @brief Construct a new Loader object * - * @param graph reference on subgraphs + * @param model reference to model */ - explicit BaseLoader(std::unique_ptr &subgs) - : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}, + explicit BaseLoader(std::unique_ptr &model) + : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}, _tensor_names(std::make_shared>()) { _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA); @@ -114,7 +114,7 @@ protected: // Get BuiltinOperator BuiltinOperator getBuiltinOperator(const Operator *op) { - auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index()); + auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index()); auto builtin_op = builtin_opcode->builtin_code(); if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES) builtin_op = static_cast(builtin_opcode->deprecated_builtin_code()); @@ -176,7 +176,7 @@ private: void verifySubgraphIndex(int subg_index) { - const auto num_subgraphs = _model->subgraphs()->size(); + const auto num_subgraphs = _domain_model->subgraphs()->size(); if (subg_index < 0 || subg_index >= static_cast(num_subgraphs)) throw std::runtime_error{std::string{"Invalid subgraph index - "} + std::to_string(subg_index)}; @@ -189,9 +189,9 @@ protected: int32_t _pagesize; // loaded file description int _fd; - // Reference on loadable subgraphs - std::unique_ptr &_subgraphs; - const Model *_model; + // Reference to ir::model (to be loaded from _domain_model) + std::unique_ptr &_model; + const Model *_domain_model; // Maps Tensor indices to onert Operands. std::vector _tensor_to_operand; std::shared_ptr> _tensor_names; @@ -290,6 +290,8 @@ ir::DataType BaseLoader::BaseLoader::tensorTypeToDataType(const Te case TensorType::TensorType_INT8: return ir::DataType::QUANT_INT8_ASYMM; // case TensorType::TensorType_FLOAT64 + case TensorType::TensorType_UINT32: + return ir::DataType::UINT32; default: throw std::runtime_error( std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); @@ -358,7 +360,7 @@ ir::OperandIndex BaseLoader::loadOperand(const Tensor *tensor, ir: const auto operand_index = subg.addOperand(shape, type_info); // Constant tensors are indicated by non-empty data. - const auto *data = _model->buffers()->Get(tensor->buffer())->data(); + const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data(); if (data != nullptr) { using std::ptrdiff_t; @@ -1037,7 +1039,7 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS && "Unsupported custom operation options format"); - auto *op_code = _model->operator_codes()->Get(op->opcode_index()); + auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index()); auto custom_op_name = op_code->custom_code()->str(); enum class BuiltinOP @@ -1670,7 +1672,7 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg template void BaseLoader::loadModel() { LoaderDomain::VerifyModelBuffer(*_verifier.get()); - _model = LoaderDomain::GetModel(_base); + _domain_model = LoaderDomain::GetModel(_base); // Version unused // const auto version = _model->version(); // Description unused @@ -1678,14 +1680,14 @@ template void BaseLoader::loadModel() // Metabuffer unsued // const auto *metadata_buffer = _model->metadata_buffer(); // Load subgraphs and map operations on subgraph - const auto domain_subgraphs = _model->subgraphs(); - auto subgraphs = std::make_unique(); - for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index) + const auto subgraphs = _domain_model->subgraphs(); + auto model = std::make_unique(); + for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index) { - auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]); - subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg)); + auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]); + model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg)); } - _subgraphs = std::move(subgraphs); + _model = std::move(model); } } // namespace base_loader diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h index 44bf280..87e5d70 100644 --- a/runtime/onert/frontend/circle/include/circle_loader.h +++ b/runtime/onert/frontend/circle/include/circle_loader.h @@ -25,8 +25,8 @@ namespace onert { namespace circle_loader { -std::unique_ptr loadModel(const std::string &filename); -std::unique_ptr loadModel(uint8_t *buffer, size_t size); +std::unique_ptr loadModel(const std::string &filename); +std::unique_ptr loadModel(uint8_t *buffer, size_t size); } // namespace circle_loader } // namespace onert diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index aae831d..5abcc9c 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -228,20 +228,20 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg) } // namespace -std::unique_ptr loadModel(const std::string &filename) +std::unique_ptr loadModel(const std::string &filename) { - auto subgraphs = std::make_unique(); - CircleLoader loader(subgraphs); + auto model = std::make_unique(); + CircleLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } -std::unique_ptr loadModel(uint8_t *buffer, size_t size) +std::unique_ptr loadModel(uint8_t *buffer, size_t size) { - auto subgraphs = std::make_unique(); - CircleLoader loader(subgraphs); + auto model = std::make_unique(); + CircleLoader loader(model); loader.loadFromBuffer(buffer, size); - return subgraphs; + return model; } } // namespace circle_loader diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc index 56ca5ef..19636a8 100644 --- a/runtime/onert/frontend/nnapi/execution.cc +++ b/runtime/onert/frontend/nnapi/execution.cc @@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation, return ANEURALNETWORKS_UNEXPECTED_NULL; } - std::shared_ptr executors; + std::shared_ptr executors; compilation->publish(executors); diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc index 63036a3..bb247b9 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc @@ -18,11 +18,12 @@ #include "util/logging.h" +using namespace onert; + // TODO Support multiple subgraphs ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept - : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique( - _subgraphs.get())}, - _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}} + : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()}, + _compiler{std::make_shared(_model, *_coptions)} { if (model->allowedToFp16()) { @@ -34,7 +35,7 @@ bool ANeuralNetworksCompilation::finish() noexcept { try { - _executors = _compiler->compile(); + _artifact = _compiler->compile(); } catch (const std::exception &e) { diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h index bd61f9d..dff5c6d 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h @@ -21,8 +21,8 @@ #include "compiler/Compiler.h" #include "ir/Graph.h" -#include "ir/Subgraphs.h" -#include "exec/IExecutor.h" +#include "ir/Model.h" +#include "exec/Executors.h" #include "util/TracingCtx.h" struct ANeuralNetworksCompilation @@ -34,23 +34,16 @@ public: bool finish() noexcept; onert::compiler::State state(void) noexcept { return _compiler->state(); } - void publish(std::shared_ptr &executors) noexcept + void publish(std::shared_ptr &executors) noexcept { - executors = _executors; + executors = _artifact ? _artifact->_executors : nullptr; } private: - std::shared_ptr _subgraphs; - // TODO Refine the ownership of TracingCtx - // In case of nnfw API, nnfw_session has ownership of TracingCtx. - // In case of nnapi, there is no concept of session and primary model might have the ownership - // of TracingCtx. - // Since we don't support multiple models yet with nnapi in ONE, let's implement this later - // and let's make it work with one model for now. - std::unique_ptr _tracing_ctx; - + std::shared_ptr _model; + std::unique_ptr _coptions; std::shared_ptr _compiler; - std::shared_ptr _executors; + std::shared_ptr _artifact; }; #endif diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h index 70c5d2a..110c7cd 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h @@ -26,7 +26,7 @@ struct ANeuralNetworksExecution { public: - ANeuralNetworksExecution(const std::shared_ptr &executors) + ANeuralNetworksExecution(const std::shared_ptr &executors) : _execution{std::make_shared(executors)} { // DO NOTHING diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc index 81ffa26..a641368 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc @@ -273,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void) }); } -std::shared_ptr ANeuralNetworksModel::getSubGraphs() const +std::shared_ptr ANeuralNetworksModel::getModel() const { - auto all_subgs = std::make_shared(); + auto model = std::make_shared(); - all_subgs->push(onert::ir::SubgraphIndex{0}, _graph); + model->push(onert::ir::SubgraphIndex{0}, _graph); // TODO Find all child subgraphs and copy them to all_subgs // Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index // to operands of control flow operations // Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has // subgraph itself recursively - return all_subgs; + return model; } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h index 4301193..04f4cf0 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h @@ -22,7 +22,7 @@ #include #include "ir/Graph.h" -#include "ir/Subgraphs.h" +#include "ir/Model.h" struct ANeuralNetworksModel { @@ -59,7 +59,7 @@ public: size_t operandSize(uint32_t index) noexcept; bool isUsageSet(uint32_t index) noexcept; bool isOperationOutput(uint32_t index) noexcept; - std::shared_ptr getSubGraphs() const; + std::shared_ptr getModel() const; private: void setOptionalOperand(const onert::ir::OperandIndex idx); diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h index dda34cc..cf17863 100644 --- a/runtime/onert/frontend/tflite/include/tflite_loader.h +++ b/runtime/onert/frontend/tflite/include/tflite_loader.h @@ -26,7 +26,7 @@ namespace onert namespace tflite_loader { -std::unique_ptr loadModel(const std::string &filename); +std::unique_ptr loadModel(const std::string &filename); } // namespace tflite_loader } // namespace onert diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc index 3b16047..fe69e4e 100644 --- a/runtime/onert/frontend/tflite/src/tflite_loader.cc +++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc @@ -154,12 +154,12 @@ void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg) } // namespace -std::unique_ptr loadModel(const std::string &filename) +std::unique_ptr loadModel(const std::string &filename) { - auto subgraphs = std::make_unique(); - TFLiteLoader loader(subgraphs); + auto model = std::make_unique(); + TFLiteLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } } // namespace tflite_loader diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt index 7a0df4e..8d9063f 100644 --- a/runtime/onert/frontend/trix/CMakeLists.txt +++ b/runtime/onert/frontend/trix/CMakeLists.txt @@ -2,7 +2,7 @@ if (NOT BUILD_TRIX_LOADER) return() endif () -nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET) +nnfw_find_package(TRIXEngine QUIET 2.5.0) if(TRIXEngine_FOUND) list(APPEND SOURCES src/trix_loader.cc) else() diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h index 297d5ec..26d6a3c 100644 --- a/runtime/onert/frontend/trix/include/trix_loader.h +++ b/runtime/onert/frontend/trix/include/trix_loader.h @@ -27,7 +27,7 @@ namespace trix_loader /** * @throw runtime_error when tvn path is wrong or tvn is invalid */ -std::unique_ptr loadModel(const std::string &filename); +std::unique_ptr loadModel(const std::string &filename); } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc index e2995bb..cdf2396 100644 --- a/runtime/onert/frontend/trix/src/trix_loader.cc +++ b/runtime/onert/frontend/trix/src/trix_loader.cc @@ -67,11 +67,11 @@ void TrixMetaReader::init(const char *path) _meta = getNPUmodel_metadata(path, false); if (_meta == nullptr) { - throw std::runtime_error("Failed to get TRIV2 model metadata"); + throw std::runtime_error("Failed to get TRIX model metadata"); } if (NPUBIN_VERSION(_meta->magiccode) != 3) { - throw std::runtime_error("TRIV2 model metadata version mismatched."); + throw std::runtime_error("TRIX model metadata version mismatched."); } } @@ -81,9 +81,9 @@ public: /** * @brief Construct a new Loader object * - * @param graph reference on subgraphs + * @param model reference on model */ - explicit TrixLoader(std::unique_ptr &subgs) : _subgraphs(subgs) {} + explicit TrixLoader(std::unique_ptr &model) : _model(model) {} /** * @brief Load a model from file @@ -97,7 +97,6 @@ private: * @throw runtime_error when tvn path is wrong or tvn is invalid */ void loadModel(); - void loadSubgraphs(); std::unique_ptr loadSubgraph(); void loadOperands(ir::Graph &subg); ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg); @@ -112,8 +111,11 @@ private: protected: /** path to model (e.g. tvn) */ std::string _model_path; + /** original IO shapes */ + std::vector _origin_input_shapes; + std::vector _origin_output_shapes; /** Reference on loadable subgraphs */ - std::unique_ptr &_subgraphs; + std::unique_ptr &_model; TrixMetaReader _meta; }; @@ -154,6 +156,8 @@ void TrixLoader::loadBulk(ir::Graph &subg) { ir::operation::Bulk::Param param; param.binary_path = _model_path; + param.origin_input_shapes = _origin_input_shapes; + param.origin_output_shapes = _origin_output_shapes; ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; @@ -175,6 +179,7 @@ ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg) ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)), _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx)); + _origin_input_shapes.push_back(shape); // Create operand const auto operand_index = subg.addOperand(shape, type_info); return operand_index; @@ -191,6 +196,7 @@ ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)), _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx)); + _origin_output_shapes.push_back(shape); // Create operand const auto operand_index = subg.addOperand(shape, type_info); return operand_index; @@ -237,15 +243,13 @@ std::unique_ptr TrixLoader::loadSubgraph() return subg; } -void TrixLoader::loadSubgraphs() +void TrixLoader::loadModel() { // one subgraph only auto subg = loadSubgraph(); - _subgraphs->push(ir::SubgraphIndex(0), std::move(subg)); + _model->push(ir::SubgraphIndex(0), std::move(subg)); } -void TrixLoader::loadModel() { loadSubgraphs(); } - void TrixLoader::loadFromFile(const std::string &file_path) { // model path will be used to set Bulk param @@ -255,12 +259,12 @@ void TrixLoader::loadFromFile(const std::string &file_path) loadModel(); } -std::unique_ptr loadModel(const std::string &filename) +std::unique_ptr loadModel(const std::string &filename) { - auto subgraphs = std::make_unique(); - TrixLoader loader(subgraphs); + auto model = std::make_unique(); + TrixLoader loader(model); loader.loadFromFile(filename); - return subgraphs; + return model; } } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc index 9fc8e1f..eecbd22 100644 --- a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc +++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc @@ -22,10 +22,10 @@ namespace onert { namespace trix_loader { -std::unique_ptr loadModel(const std::string &) +std::unique_ptr loadModel(const std::string &) { - auto subgraphs = std::make_unique(); - return subgraphs; + auto model = std::make_unique(); + return model; } } // namespace trix_loader } // namespace onert diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt deleted file mode 100644 index 3889997..0000000 --- a/runtime/onert/test/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -set(TEST_ONERT test_onert) - -file(GLOB_RECURSE TESTS "*.cc") - -add_executable(${TEST_ONERT} ${TESTS}) - -target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src) - -target_link_libraries(${TEST_ONERT} onert_core) -target_link_libraries(${TEST_ONERT} gtest) -target_link_libraries(${TEST_ONERT} gtest_main) -target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl) -add_test(${TEST_ONERT} ${TEST_ONERT}) - -install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone) diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc deleted file mode 100644 index 514c014..0000000 --- a/runtime/onert/test/core/compiler/HEScheduler.cc +++ /dev/null @@ -1,573 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include - -namespace -{ -using namespace onert; -using namespace ir; -using namespace backend; -using namespace operation; -using namespace exec; - -// -// Mock backends classes -// - -struct MockConfigCPU : public IConfig -{ - std::string id() override { return "cpu"; } - bool initialize() override { return true; }; - bool supportPermutation() override { return false; } - Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; } - bool supportDynamicTensor() override { return false; } - bool supportFP16() override { return false; } -}; - -class MockBackendContext : public BackendContext -{ -public: - using BackendContext::BackendContext; - ITensorRegistry *genTensors() override { return nullptr; } - FunctionMap genKernels() override { return {}; } -}; - -struct MockBackendCPU : public Backend -{ - std::shared_ptr config() const override { return std::make_shared(); } - std::unique_ptr newContext(ContextData &&data) const override - { - return std::make_unique(this, std::move(data), nullptr); - } -}; - -struct MockConfigGPU : public IConfig -{ - std::string id() override { return "gpu"; } - bool initialize() override { return true; }; - bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override - { - return ir::Layout::UNKNOWN; - } - bool supportDynamicTensor() override { return false; } - bool supportFP16() override { return false; } -}; - -struct MockBackendGPU : public Backend -{ - std::shared_ptr config() const override { return std::make_shared(); } - std::unique_ptr newContext(ContextData &&data) const override - { - return std::make_unique(this, std::move(data), nullptr); - } -}; - -struct MockConfigNPU : public IConfig -{ - std::string id() override { return "npu"; } - bool initialize() override { return true; }; - bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override - { - return ir::Layout::UNKNOWN; - } - bool supportDynamicTensor() override { return false; } - bool supportFP16() override { return false; } -}; - -struct MockBackendNPU : public Backend -{ - std::shared_ptr config() const override { return std::make_shared(); } - std::unique_ptr newContext(ContextData &&data) const override - { - return std::make_unique(this, std::move(data), nullptr); - } -}; - -// -// Constants -// - -const int OPERAND_ELEMS = 268203; -const int OPERAND_SIZE = OPERAND_ELEMS * 4; -const int OPERATION_SIZE = OPERAND_SIZE * 3; - -const std::string LINEAR("Linear"); -const std::string DATAFLOW("Dataflow"); -const std::string PARALLEL("Parallel"); - -// -// Helper functions -// - -// Set executor through environment variable -void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); } - -// Set profiling mode through environment variable -void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); } - -// Calculate operation size by addition sizes of all input and output operands -uint32_t calcOpSize(const std::shared_ptr &graph, const OperationIndex &op_idx) -{ - uint32_t size = 0; - const auto &op = graph->operations().at(op_idx); - for (const auto &ind : op.getInputs() + op.getOutputs()) - size += graph->operands().at(ind).info().total_size(); - return size; -} - -// Set execution operation time. This method is needed since ExecutionTime has only -// 'updateOperationExecTime' method. -void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation, - bool quant, uint32_t op_size, int64_t time) -{ - // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it - assert(time > 0); - int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size); - int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; - et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set); - assert(et.getOperationExecTime(backend, operation, quant, op_size) == time); -} - -// Set same execution time for all given backends/operations -void setOperationsExecutionTime(const std::vector &backends, - const std::vector &op_names, - const std::vector &op_sizes, int64_t exec_time) -{ - assert(op_names.size() == op_sizes.size()); - ExecTime et(backends); - for (int i = 0; i < op_names.size(); ++i) - { - for (auto &backend : backends) - setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); - } - et.storeOperationsExecTime(); -} - -// Set permute time from one backend to another. This method is needed since ExecutionTime has only -// 'updatePermuteTime' method. -void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend, - bool quant, uint32_t op_size, int64_t time) -{ - // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it - assert(time > 0); - int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size); - int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time; - et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set); - assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time); -} - -// Set same permutation time between all given backends -void setPermutationsExecutionTime(const std::vector &backends, - const int operand_size, const int64_t exec_time) -{ - ExecTime et(backends); - for (const auto &backend : backends) - { - for (auto &other_backend : backends) - { - if (backend == other_backend) - continue; - setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); - } - } - et.storeOperationsExecTime(); -} - -// -// Functions for creating graphs -// - -using OIS = OperandIndexSequence; - -template -OperationIndex create(std::shared_ptr graph, Types &&... args) -{ - auto op = std::make_unique(std::forward(args)...); - auto op_idx = graph->addOperation(std::move(op)); - // For now in scheduler test all operations in tested graphs has same size (for simplicity) - assert(calcOpSize(graph, op_idx) == OPERATION_SIZE); - return op_idx; -} - -// Create straight graph: Add->Sub->Mul -std::shared_ptr createStraightGraph() -{ - auto graph = std::make_shared(); - const TypeInfo float_op(DataType::FLOAT32); - - // Create add node - auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; - create(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); - - // Create sub node - auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; - create(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params); - - // Create mul node - auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; - create(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params); - - graph->verify(); - return graph; -} - -/* Create branched graph: - * [Add] - * // \\ - * [Mul1] [FC2] - * || || - * [Mul2] [FC2] - * \\ // - * [Sub] - */ -std::shared_ptr createBranchedGraph() -{ - auto graph = std::make_shared(); - const TypeInfo float_op(DataType::FLOAT32); - - // Create add node - auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; - create(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); - - // Create mul1 node - auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; - create(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}, - mul1_op_params); - - // Create mul2 node - auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; - create(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}, - mul2_op_params); - - // Create fc1 node - auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - FullyConnected::Param fc1_op_params{Activation::NONE}; - create(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params); - - // Create fc2 node - auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - FullyConnected::Param fc2_op_params{Activation::NONE}; - create(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params); - - // Create sub node - auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; - create(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params); - - graph->verify(); - return graph; -} - -// -// Tests setup/teardown -// - -// SetUp/TearDown methods runs before/after each test and performs actions common for each test -class HESchedulerTest : public ::testing::Test -{ -protected: - void SetUp() override - { - // Initialize mock backends - _cpu_backend = new MockBackendCPU(); - _gpu_backend = new MockBackendGPU(); - _npu_backend = new MockBackendNPU(); - _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend}; - - // Remove previous profile data if it exists - if (!remove("exec_time.json")) - { - // DO NOTHING (no profile data) - } - - // Remember original value of 'EXECUTOR' environment variable - char *executor = std::getenv("EXECUTOR"); - _original_executor = executor == nullptr ? "" : executor; - - // Remember original value of 'PROFILING_MODE' environment variable - char *profiling_mode = std::getenv("PROFILING_MODE"); - _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode; - } - - void TearDown() override - { - delete _cpu_backend; - delete _gpu_backend; - delete _npu_backend; - EXPECT_EQ(remove("exec_time.json"), 0); - setenv("EXECUTOR", _original_executor.c_str(), true); - setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true); - } - - const MockBackendCPU *_cpu_backend{nullptr}; - const MockBackendGPU *_gpu_backend{nullptr}; - const MockBackendNPU *_npu_backend{nullptr}; - std::vector _mock_backends; - - std::string _original_executor; - std::string _original_profiling_mode; -}; - -// -// HEScheduler tests -// - -class HESchedulerTestWithExecutorParam : public HESchedulerTest, - public testing::WithParamInterface -{ -}; - -// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - -// one time for each executor -INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, - testing::Values(LINEAR, DATAFLOW, PARALLEL)); - -// Test scheduler behavior for straight graph with known execution time of all nodes and permutes. -TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) -{ - setExecutor(GetParam()); - - // Prepare graph - ir::Subgraphs subgs; - auto graph(createStraightGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); - OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2); - - // Set default execution and transfer time - setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1); - setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"}, - {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); - - // Test 1 - // Expected behaviour: scheduler assigns different backend to each node - { - // For each backend reduce execution time of one node - ExecTime et(_mock_backends); - setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); - setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); - setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); - et.storeOperationsExecTime(); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); - ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu"); - ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu"); - } - - // Test 2 - // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time - { - // Increase transfer time - setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu"); - ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); - ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu"); - } -} - -// Test scheduler behavior for branched graph with known execution time of all nodes and permutes -TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) -{ - const int64_t NPU_ET = 5000; - setExecutor(GetParam()); - - // Prepare graph - ir::Subgraphs subgs; - auto graph(createBranchedGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); - OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); - - // Set default execution and transfer time - setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); - setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"}, - {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4); - - // Test 1 - // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all - // nodes, in case of parallel executor scheduler assigns different backends to branches. - { - // Reduce execution time - ExecTime et(_mock_backends); - setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET); - setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET); - setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET); - setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); - setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); - setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); - et.storeOperationsExecTime(); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - - std::string branch1_expected_backend("npu"), branch2_expected_backend("npu"); - if (GetParam() == PARALLEL) - { - branch1_expected_backend = - br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; - branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; - } - - ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend); - ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend); - ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend); - ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend); - ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); - } - - // Test 2 - // Expected behaviour: scheduler assigns single backend to all nodes - { - // Increase execution time for GPU backend - ExecTime et(_mock_backends); - /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt * - * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the - * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter - * branching or scheduler assigns another backend to a node*/ - setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); - setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); - et.storeOperationsExecTime(); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu"); - } -} - -// Test scheduler behavior for branched graph and enabled profiling mode -TEST_F(HESchedulerTest, branched_graph_profiling_mode) -{ - const int ET = 1e5; - - // Turn on profiling mode - setProfilingMode(true); - setExecutor(DATAFLOW); - - // Prepare graph - ir::Subgraphs subgs; - auto graph(createBranchedGraph()); - subgs.push(ir::SubgraphIndex{0}, graph); - OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); - - // Test 1 - // Expected behaviour: scheduler assigns backends to nodes with unknown execution time - { - // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC - ExecTime et(_mock_backends); - setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); - setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); - setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); - et.storeOperationsExecTime(); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu"); - ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu"); - ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu"); - ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu"); - } - - // Test 2 - // Expected behaviour: scheduler shuffling backends, so different backends are assigned to - // neighbor nodes - { - // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC) - ExecTime et(_mock_backends); - setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); - setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); - setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); - et.storeOperationsExecTime(); - - // Test scheduler - auto scheduler = - compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs)); - const auto br = scheduler.schedule(*graph); - ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), - br->getBackend(mul1_op_idx)->config()->id()); - ASSERT_NE(br->getBackend(add_op_idx)->config()->id(), - br->getBackend(fc1_op_idx)->config()->id()); - ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(), - br->getBackend(mul2_op_idx)->config()->id()); - ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(), - br->getBackend(fc2_op_idx)->config()->id()); - ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(), - br->getBackend(sub_op_idx)->config()->id()); - ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(), - br->getBackend(sub_op_idx)->config()->id()); - } -} - -// TODO: Add tests with unknown execution and permutation time - -} // unnamed namespace diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc deleted file mode 100644 index b18dedd..0000000 --- a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "ir/Graph.h" -#include "compiler/pass/UnusedOperandEliminationPass.h" - -using namespace onert::ir; -using namespace onert::compiler::pass; - -TEST(UnusedOperandEliminationPass, Simple) -{ - Graph graph; - - // Add tensors - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - auto in = graph.addOperand(shape, type); - auto out = graph.addOperand(shape, type); - - auto unused = graph.addOperand(shape, type); - - // Set model inputs/outputs - graph.addInput(in); - graph.addOutput(out); - - UnusedOperandEliminationPass{graph}.run(); - - ASSERT_TRUE(graph.operands().exist(in)); - ASSERT_TRUE(graph.operands().exist(out)); - ASSERT_FALSE(graph.operands().exist(unused)); -} diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc deleted file mode 100644 index 0183b62..0000000 --- a/runtime/onert/test/core/exec/ExecInstance.cc +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "ir/Graph.h" -#include "compiler/Compiler.h" -#include "exec/Execution.h" -#include "ir/operation/BinaryArithmetic.h" -#include "util/TracingCtx.h" - -namespace -{ - -using namespace onert::ir; - -class CompiledMockUpModel -{ -public: - CompiledMockUpModel() - { - // Model: two elementwise add operation - // model input: lhs, rhs1 - // model output: second add result (result2) - // constant: rhs2 - // result1 <= (lhs + rhs) - // result2 <= (result1 + rhs2) - // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} - // activation: none (constant) - graph = std::make_shared(); - // 1st add operands (result1 <= lhs + rhs1) - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - static float rhs2_data[4] = {3, 1, -1, 5}; - auto operand_lhs = graph->addOperand(shape, type); - auto operand_rhs1 = graph->addOperand(shape, type); - auto operand_result1 = graph->addOperand(shape, type); - auto operand_rhs2 = graph->addOperand(shape, type); - auto operand_result2 = graph->addOperand(shape, type); - graph->operands() - .at(operand_rhs2) - .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); - // 2nd add operations (result2 <= result1 + rhs2) - operation::BinaryArithmetic::Param param1; - param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param1.activation = Activation::NONE; - auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; - auto output_set1 = OperandIndexSequence{operand_result1}; - graph->addOperation( - std::make_unique(input_set1, output_set1, param1)); - operation::BinaryArithmetic::Param param2; - param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param2.activation = Activation::NONE; - auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; - auto output_set2 = OperandIndexSequence{operand_result2}; - graph->addOperation( - std::make_unique(input_set2, output_set2, param2)); - // Identify model inputs and outputs - graph->addInput(operand_lhs); - graph->addInput(operand_rhs1); - graph->addOutput(operand_result2); - graph->verify(); - - // Compile - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, graph); - tracing_ctx = std::make_unique(subgs.get()); - onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; - executors = compiler.compile(); - } - -public: - std::shared_ptr graph; - std::shared_ptr executors; - std::unique_ptr tracing_ctx; -}; - -TEST(ExecInstance, simple) -{ - auto mockup = CompiledMockUpModel(); - auto graph = mockup.graph; - auto executors = mockup.executors; - - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto output = IOIndex{0}; - - const float input1_buffer[4] = {1, 0, -1, -2}; - const float input2_buffer[4] = {1, -3, 2, -4}; - float output_buffer[4] = {}; - const float output_expected[4] = {5, -2, 0, -1}; - - onert::exec::Execution execution{executors}; - - execution.setInput(input1, reinterpret_cast(input1_buffer), 16); - execution.setInput(input2, reinterpret_cast(input2_buffer), 16); - execution.setOutput(output, reinterpret_cast(output_buffer), 16); - execution.execute(); - - for (auto i = 0; i < 4; i++) - { - EXPECT_EQ(output_buffer[i], output_expected[i]); - } -} - -TEST(ExecInstance, twoCompile) -{ - auto mockup = CompiledMockUpModel(); - auto graph = mockup.graph; - auto executors1 = mockup.executors; - onert::exec::Execution execution1{executors1}; - - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto output = IOIndex{0}; - - const float exe1_input1_buffer[4] = {1, 0, -1, -2}; - const float exe1_input2_buffer[4] = {1, -3, 2, -4}; - float exe1_output_buffer[4] = {}; - const float exe1_output_expected[4] = {5, -2, 0, -1}; - - execution1.setInput(input1, reinterpret_cast(exe1_input1_buffer), 16); - execution1.setInput(input2, reinterpret_cast(exe1_input2_buffer), 16); - execution1.setOutput(output, reinterpret_cast(exe1_output_buffer), 16); - - // Make new executor: compile again - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, graph); - auto tracing_ctx = std::make_unique(subgs.get()); - onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; - std::shared_ptr executors2 = compiler.compile(); - onert::exec::Execution execution2{executors2}; - - const float exe2_input1_buffer[4] = {2, 1, -2, 0}; - const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; - float exe2_output_buffer[4] = {}; - const float exe2_output_expected[4] = {2, 5, -2, 7}; - - execution2.setInput(input1, reinterpret_cast(exe2_input1_buffer), 16); - execution2.setInput(input2, reinterpret_cast(exe2_input2_buffer), 16); - execution2.setOutput(output, reinterpret_cast(exe2_output_buffer), 16); - - execution1.execute(); - execution2.execute(); - - for (auto i = 0; i < 4; i++) - { - EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); - EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); - } -} - -// Support two initialized execution instance then ordered execution -TEST(ExecInstance, twoExecution) -{ - auto mockup = CompiledMockUpModel(); - auto executors = mockup.executors; - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto output1 = IOIndex{0}; - - const float exe1_input1_buffer[4] = {1, 0, -1, -2}; - const float exe1_input2_buffer[4] = {1, -3, 2, -4}; - float exe1_output_buffer[4] = {}; - const float exe1_output_expected[4] = {5, -2, 0, -1}; - const float exe2_output_expected[4] = {2, 5, -2, 7}; - - onert::exec::Execution execution1{executors}; - execution1.setInput(input1, reinterpret_cast(exe1_input1_buffer), 16); - execution1.setInput(input2, reinterpret_cast(exe1_input2_buffer), 16); - execution1.setOutput(output1, reinterpret_cast(exe1_output_buffer), 16); - - const float exe2_input1_buffer[4] = {2, 1, -2, 0}; - const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; - float exe2_output_buffer[4] = {}; - - // Make new execution - onert::exec::Execution execution2{executors}; - execution2.setInput(input1, reinterpret_cast(exe2_input1_buffer), 16); - execution2.setInput(input2, reinterpret_cast(exe2_input2_buffer), 16); - execution2.setOutput(output1, reinterpret_cast(exe2_output_buffer), 16); - - execution1.execute(); - execution2.execute(); - - for (auto i = 0; i < 4; i++) - { - EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); - EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); - } -} - -class Inference -{ -public: - Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], - std::shared_ptr &executors) - : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} - { - // DO NOTHING - } - - void inference(void) - { - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto output1 = IOIndex{0}; - - onert::exec::Execution execution{_executors}; - execution.setInput(input1, reinterpret_cast(_input1), 16); - execution.setInput(input2, reinterpret_cast(_input2), 16); - execution.setOutput(output1, reinterpret_cast(_output), 16); - - execution.execute(); - } - -private: - const float (&_input1)[4]; - const float (&_input2)[4]; - float (&_output)[4]; - std::shared_ptr &_executors; -}; - -// Support multi-thread execution -TEST(ExecInstance, twoThreads) -{ - auto mockup = CompiledMockUpModel(); - auto executors = mockup.executors; - - const float exe1_input1_buffer[4] = {1, 0, -1, -2}; - const float exe1_input2_buffer[4] = {1, -3, 2, -4}; - float exe1_output_buffer[4] = {}; - const float exe1_output_expected[4] = {5, -2, 0, -1}; - - Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors}; - - const float exe2_input1_buffer[4] = {2, 1, -2, 0}; - const float exe2_input2_buffer[4] = {-3, 3, 1, 2}; - float exe2_output_buffer[4] = {}; - const float exe2_output_expected[4] = {2, 5, -2, 7}; - - Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors}; - - std::thread t1{&Inference::inference, &execution1}; - std::thread t2{&Inference::inference, &execution2}; - - t1.join(); - t2.join(); - - for (auto i = 0; i < 4; i++) - { - EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]); - EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]); - } -} - -// Support asynchronous execution -TEST(ExecInstance, async) -{ - auto mockup = CompiledMockUpModel(); - auto graph = mockup.graph; - auto executors = mockup.executors; - - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto output = IOIndex{0}; - - const float input1_buffer[4] = {1, 0, -1, -2}; - const float input2_buffer[4] = {1, -3, 2, -4}; - float output_buffer[4] = {}; - const float output_expected[4] = {5, -2, 0, -1}; - - onert::exec::Execution execution{executors}; - - execution.setInput(input1, reinterpret_cast(input1_buffer), 16); - execution.setInput(input2, reinterpret_cast(input2_buffer), 16); - execution.setOutput(output, reinterpret_cast(output_buffer), 16); - execution.startExecute(); - execution.waitFinish(); - - for (auto i = 0; i < 4; i++) - { - EXPECT_EQ(output_buffer[i], output_expected[i]); - } -} - -} // namespace diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc deleted file mode 100644 index 178b61e..0000000 --- a/runtime/onert/test/core/exec/ExecTime.test.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "exec/ExecTime.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" -#include -#include - -namespace -{ -using namespace onert; -using namespace exec; -using namespace backend; - -struct MockConfig : public IConfig -{ - std::string id() override { return "b1"; } - bool initialize() override { return true; }; - bool supportPermutation() override { return false; } - ir::Layout supportLayout(const ir::Operation &, ir::Layout) override - { - return ir::Layout::UNKNOWN; - } - bool supportDynamicTensor() override { return false; } - bool supportFP16() override { return false; } -}; - -struct MockBackend : public ::onert::backend::Backend -{ - std::shared_ptr config() const override - { - return std::make_shared(); - } - std::unique_ptr newContext(ContextData &&) const override - { - return nullptr; - } -}; - -TEST(ExecTime, roundtrip_ok) -{ - const auto *b = new MockBackend(); - std::vector bs = {b}; - { - ExecTime et(bs); - et.updateOperationExecTime(b, "op1", true, 100, 100); - et.updateOperationExecTime(b, "op1", true, 200, 200); - et.updateOperationExecTime(b, "op1", false, 100, 888); - et.storeOperationsExecTime(); - } - { - ExecTime et(bs); - auto time = et.getOperationExecTime(b, "op1", true, 100); - ASSERT_EQ(time, 100); - // Check interpolation - time = et.getOperationExecTime(b, "op1", true, 150); - ASSERT_EQ(time, 150); - time = et.getOperationExecTime(b, "op1", false, 100); - ASSERT_EQ(time, 888); - et.storeOperationsExecTime(); - } - // clean up - EXPECT_EQ(remove("exec_time.json"), 0); -} - -TEST(ExecTime, structure) -{ - - const auto *b = new MockBackend(); - std::vector bs = {b}; - { - ExecTime et(bs); - et.updateOperationExecTime(b, "op1", true, 100, 100); - et.updateOperationExecTime(b, "op1", true, 200, 200); - et.storeOperationsExecTime(); - } - { - ExecTime et(bs); - auto time = et.getOperationExecTime(b, "op1", true, 100); - ASSERT_EQ(time, 100); - // Check interpolation - time = et.getOperationExecTime(b, "op1", true, 200); - ASSERT_EQ(time, 200); - et.storeOperationsExecTime(); - } - // clean up - EXPECT_EQ(remove("exec_time.json"), 0); -} -} // unnamed namespace diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc deleted file mode 100644 index a9f7cd4..0000000 --- a/runtime/onert/test/core/interp/ExecManager.cc +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include - -#include "ir/Graph.h" -#include "interp/InterpExecutor.h" -#include "exec/Execution.h" -#include "ir/operation/BinaryArithmetic.h" - -namespace -{ - -using namespace onert::ir; -using InterpExecutor = onert::interp::InterpExecutor; -using Execution = onert::exec::Execution; -using ExecutorMap = onert::exec::ExecutorMap; - -class InterpExecutorTest : public ::testing::Test -{ -protected: - virtual void SetUp() {} - void CreateSimpleModel() - { - // Model: one elementwise add operation - // model input: lhs, rhs - // model output: add result - // lhs, rhs, result shape: {1, 2, 2, 1} - // activation: none (constant) - _graph = std::make_unique(); - - // Add operands - - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::INT32}; - Shape shape_scalar(0); - TypeInfo type_scalar{DataType::INT32}; - - auto operand_lhs = _graph->addOperand(shape, type); - auto operand_rhs = _graph->addOperand(shape, type); - auto operand_result = _graph->addOperand(shape, type); - - // Add operations - - operation::BinaryArithmetic::Param param; - param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param.activation = Activation::NONE; - auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; - auto output_set = OperandIndexSequence{operand_result}; - _graph->addOperation( - std::make_unique(input_set, output_set, param)); - - // Identify model inputs and outputs - - _graph->getInputs().append(operand_lhs); - _graph->getInputs().append(operand_rhs); - _graph->getOutputs().append(operand_result); - - _graph->verify(); - - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); - - _executors = std::make_shared(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); - } - - void CreateTwoStepModel() - { - // Model: two elementwise add operation - // model input: lhs, rhs1 - // model output: second add result (result2) - // constant: rhs2 - // result1 <= (lhs + rhs) - // result2 <= (result1 + rhs2) - // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1} - // activation: none (constant) - _graph = std::make_unique(); - - // 1st add operands (result1 <= lhs + rhs1) - - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::INT32}; - Shape shape_scalar(0); - TypeInfo type_scalar{DataType::INT32}; - - static int32_t rhs2_data[4] = {3, 1, -1, 5}; - - auto operand_lhs = _graph->addOperand(shape, type); - auto operand_rhs1 = _graph->addOperand(shape, type); - auto operand_result1 = _graph->addOperand(shape, type); - auto operand_rhs2 = _graph->addOperand(shape, type); - auto operand_result2 = _graph->addOperand(shape, type); - _graph->operands() - .at(operand_rhs2) - .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); - - // 2nd add operations (result2 <= result1 + rhs2) - - operation::BinaryArithmetic::Param param1; - param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param1.activation = Activation::NONE; - auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; - auto output_set1 = OperandIndexSequence{operand_result1}; - _graph->addOperation( - std::make_unique(input_set1, output_set1, param1)); - - operation::BinaryArithmetic::Param param2; - param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param2.activation = Activation::NONE; - auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; - auto output_set2 = OperandIndexSequence{operand_result2}; - _graph->addOperation( - std::make_unique(input_set2, output_set2, param2)); - - // Identify model inputs and outputs - - _graph->getInputs().append(operand_lhs); - _graph->getInputs().append(operand_rhs1); - _graph->getOutputs().append(operand_result2); - - _graph->verify(); - - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); - - _executors = std::make_shared(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); - } - - void CreateUnspecifiedDimensionsModel() - { - // Model: one elementwise add operation - // model input: lhs, rhs - // model output: add result - // lhs, rhs, result shape: {1, unknown, 2, 1} - // activation: none (constant) - _graph = std::make_unique(); - - // Add operands - - Shape shape{1, 0, 2, 1}; - TypeInfo type{DataType::INT32}; - Shape shape_scalar(0); - TypeInfo type_scalar{DataType::INT32}; - - auto operand_lhs = _graph->addOperand(shape, type); - auto operand_rhs = _graph->addOperand(shape, type); - - auto operand_activation = _graph->addOperand(shape_scalar, type_scalar); - _graph->operands() - .at(operand_activation) - .data(std::make_unique(reinterpret_cast(&_activation_value), 4)); - - auto operand_result = _graph->addOperand(shape, type); - - // Add operations - - operation::BinaryArithmetic::Param param; - param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param.activation = Activation::NONE; - auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; - auto output_set = OperandIndexSequence{operand_result}; - _graph->addOperation( - std::make_unique(input_set, output_set, param)); - - // Identify model inputs and outputs - - _graph->getInputs().append(operand_lhs); - _graph->getInputs().append(operand_rhs); - _graph->getOutputs().append(operand_result); - - _graph->verify(); - - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, _graph); - _graph->setSubgraphs(subgs); - - _executors = std::make_shared(); - _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); - } - - void createExecution() { _execution = std::make_unique(_executors); } - - virtual void TearDown() { _executors = nullptr; } - - std::shared_ptr _graph{nullptr}; - std::shared_ptr _executors{nullptr}; - std::unique_ptr _execution{nullptr}; - const int32_t _activation_value{0}; -}; - -TEST_F(InterpExecutorTest, create_empty) -{ - Graph graph; - graph.verify(); - auto executor = std::make_unique(graph); - ASSERT_NE(executor, nullptr); -} - -TEST_F(InterpExecutorTest, create_simple) -{ - CreateSimpleModel(); - ASSERT_NE(_executors, nullptr); - ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr); -} - -TEST_F(InterpExecutorTest, neg_setInput) -{ - CreateSimpleModel(); - createExecution(); - - auto input1 = IOIndex{0}; - const int32_t input1_buffer[4] = {1, 0, -1, -2}; - - EXPECT_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 4), - std::runtime_error); - EXPECT_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 12), - std::runtime_error); - EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); -} - -TEST_F(InterpExecutorTest, neg_setOutput) -{ - CreateSimpleModel(); - createExecution(); - - auto output = IOIndex{0}; - auto output_idx = _graph->getOutputs().at(output); - - int32_t output_buffer[4] = {}; - - EXPECT_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 4), - std::runtime_error); - EXPECT_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 12), - std::runtime_error); - EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); -} - -TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions) -{ - CreateUnspecifiedDimensionsModel(); - createExecution(); - - auto input1 = IOIndex{0}; - const int32_t input1_buffer[4] = {1, 0, -1, -2}; - - TypeInfo operand_type{DataType::INT32}; - Shape operand_shape{1, 2, 2, 1}; - - EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, - reinterpret_cast(input1_buffer), 4), - std::runtime_error); - EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape, - reinterpret_cast(input1_buffer), 12), - std::runtime_error); - EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape, - reinterpret_cast(input1_buffer), 16)); -} - -TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions) -{ - CreateUnspecifiedDimensionsModel(); - createExecution(); - - auto output = IOIndex{0}; - auto output_idx = _graph->getOutputs().at(output); - - TypeInfo operand_type{DataType::INT32}; - Shape operand_shape{1, 2, 2, 1}; - - int32_t output_buffer[4] = {}; - - EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, - reinterpret_cast(output_buffer), 4), - std::runtime_error); - EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape, - reinterpret_cast(output_buffer), 12), - std::runtime_error); - EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape, - reinterpret_cast(output_buffer), 16)); -} - -TEST_F(InterpExecutorTest, execute) -{ - CreateSimpleModel(); - createExecution(); - - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto input1_idx = _graph->getInputs().at(input1); - auto input2_idx = _graph->getInputs().at(input2); - - const int32_t input1_buffer[4] = {1, 0, -1, -2}; - const int32_t input2_buffer[4] = {1, -3, 2, -4}; - - auto output = IOIndex{0}; - auto output_idx = _graph->getOutputs().at(output); - - int32_t output_buffer[4] = {}; - - EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); - EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast(input2_buffer), 16)); - EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); - EXPECT_NO_THROW(_execution->execute()); - EXPECT_EQ(output_buffer[0], 2); - EXPECT_EQ(output_buffer[1], -3); - EXPECT_EQ(output_buffer[2], 1); - EXPECT_EQ(output_buffer[3], -6); -} - -TEST_F(InterpExecutorTest, executeTwoStep) -{ - CreateTwoStepModel(); - createExecution(); - - auto input1 = IOIndex{0}; - auto input2 = IOIndex{1}; - auto input1_idx = _graph->getInputs().at(input1); - auto input2_idx = _graph->getInputs().at(input2); - - const int32_t input1_buffer[4] = {1, 0, -1, -2}; - const int32_t input2_buffer[4] = {1, -3, 2, -4}; - - auto output = IOIndex{0}; - auto output_idx = _graph->getOutputs().at(output); - - int32_t output_buffer[4] = {}; - - EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast(input1_buffer), 16)); - EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast(input2_buffer), 16)); - EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast(output_buffer), 16)); - EXPECT_NO_THROW(_execution->execute()); - EXPECT_EQ(output_buffer[0], 5); - EXPECT_EQ(output_buffer[1], -2); - EXPECT_EQ(output_buffer[2], 0); - EXPECT_EQ(output_buffer[3], -1); -} - -} // namespace diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/test/core/ir/Graph.cc deleted file mode 100644 index d6de7c0..0000000 --- a/runtime/onert/test/core/ir/Graph.cc +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Graph.h" -#include "ir/operation/BinaryArithmetic.h" -#include "ir/verifier/Verifier.h" - -TEST(Graph, neg_inputs_and_outputs) -{ - onert::ir::Graph graph; - - onert::ir::OperandIndex index0{0u}; - onert::ir::OperandIndex index1{1u}; - - graph.addInput({index0}); - graph.addInput({index1}); - - onert::ir::OperandIndex index10{10u}; - onert::ir::OperandIndex index11{11u}; - onert::ir::OperandIndex index12{12u}; - - graph.addOutput({index10}); - graph.addOutput({index11}); - graph.addOutput({index12}); - - ASSERT_EQ(graph.getInputs().size(), 2); - ASSERT_EQ(graph.getOutputs().size(), 3); - - onert::ir::IOIndex io_index0{0}; - onert::ir::IOIndex io_index1{1}; - onert::ir::IOIndex io_index2{2}; - - ASSERT_EQ(graph.getInputs().at(io_index0), 0); - ASSERT_EQ(graph.getInputs().at(io_index1), 1); - - ASSERT_EQ(graph.getOutputs().at(io_index0), 10); - ASSERT_EQ(graph.getOutputs().at(io_index1), 11); - ASSERT_EQ(graph.getOutputs().at(io_index2), 12); - - EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range); -} - -using namespace onert::ir; - -OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs, - const OperandIndexSequence outputs) -{ - // Add "ADD" operation - operation::BinaryArithmetic::Param param; - param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; - param.activation = Activation::NONE; - return graph.addOperation(std::make_unique(inputs, outputs, param)); -} - -TEST(Graph, OneOpGraphSimpleValid) -{ - // Simple Graph with just one Add operation - - Graph graph; - - // Add tensors - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - auto lhs = graph.addOperand(shape, type); - auto rhs = graph.addOperand(shape, type); - auto res = graph.addOperand(shape, type); - - addAddOperation(graph, {lhs, rhs}, {res}); - - // Set model inputs/outputs - graph.addInput(lhs); - graph.addInput(rhs); - graph.addOutput(res); - - graph.verify(); - - SUCCEED(); -} - -TEST(Graph, neg_InvalidGraph_BadInput) -{ - Graph graph; - - // Add tensors - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - auto in = graph.addOperand(shape, type); - auto out = graph.addOperand(shape, type); - - // Set model inputs/outputs - graph.addInput(in); - graph.addOutput(out); - graph.addInput(OperandIndex{89}); // Non-exisiting operand! - - EXPECT_ANY_THROW(graph.verify()); -} - -TEST(Graph, neg_InvalidGraph_BadOutput) -{ - Graph graph; - - // Add tensors - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - auto in = graph.addOperand(shape, type); - auto out = graph.addOperand(shape, type); - - // Set model inputs/outputs - graph.addInput(in); - graph.addOutput(out); - graph.addOutput(OperandIndex{12}); // Non-exisiting operand! - - EXPECT_ANY_THROW(graph.verify()); -} - -TEST(Graph, neg_InvalidAddOperation_BadInputIndex) -{ - Graph graph; - - // Add tensors - Shape shape{1, 2, 2, 1}; - TypeInfo type{DataType::FLOAT32}; - auto lhs = graph.addOperand(shape, type); - auto rhs = graph.addOperand(shape, type); - auto res = graph.addOperand(shape, type); - - // Set model inputs/outputs - graph.addInput(lhs); - graph.addInput(rhs); - graph.addOutput(res); - - ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid()); -} diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/test/core/ir/LayoutSet.cc deleted file mode 100644 index 591710a..0000000 --- a/runtime/onert/test/core/ir/LayoutSet.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/LayoutSet.h" - -using onert::ir::Layout; -using onert::ir::LayoutSet; - -TEST(ir_LayoutSet, neg_add_remove) -{ - LayoutSet set{Layout::NCHW}; - set.remove(Layout::NHWC); - ASSERT_EQ(set.size(), 1); - set.add(Layout::NHWC); - ASSERT_EQ(set.size(), 2); - set.remove(Layout::NHWC); - ASSERT_EQ(set.size(), 1); - set.remove(Layout::NCHW); - ASSERT_EQ(set.size(), 0); - set.remove(Layout::NCHW); - ASSERT_EQ(set.size(), 0); -} - -TEST(ir_LayoutSet, neg_add_twice) -{ - LayoutSet set; - set.add(Layout::NHWC); - ASSERT_EQ(set.size(), 1); - set.add(Layout::NHWC); - ASSERT_EQ(set.size(), 1); -} - -TEST(ir_LayoutSet, set_operators) -{ - LayoutSet set1{Layout::NCHW}; - LayoutSet set2{Layout::NHWC}; - LayoutSet set3 = set1 | set2; - - ASSERT_EQ(set3.size(), 2); - - ASSERT_EQ((set3 - set1).size(), 1); - ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true); - ASSERT_EQ((set3 - set2).size(), 1); - ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true); - ASSERT_EQ((set3 - set3).size(), 0); - - ASSERT_EQ((set3 & set1).size(), 1); - ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true); - ASSERT_EQ((set3 & set2).size(), 1); - ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true); - ASSERT_EQ((set1 & set2).size(), 0); -} diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/test/core/ir/OperandIndexSet.cc deleted file mode 100644 index c363e54..0000000 --- a/runtime/onert/test/core/ir/OperandIndexSet.cc +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/OperandIndexSequence.h" - -using onert::ir::OperandIndex; -using onert::ir::OperandIndexSequence; - -TEST(ir_OperandIndexSequence, neg_append) -{ - OperandIndexSequence iset{0, 2, 4, 8}; - - ASSERT_EQ(iset.size(), 4); - - iset.append(OperandIndex{10}); - - ASSERT_EQ(iset.size(), 5); - - onert::ir::IOIndex index1{1}; - onert::ir::IOIndex index2{4}; - - ASSERT_EQ(iset.at(index1), 2); - ASSERT_EQ(iset.at(index2), 10); - - ASSERT_TRUE(iset.contains(OperandIndex{2})); - ASSERT_TRUE(iset.contains(OperandIndex{10})); - ASSERT_FALSE(iset.contains(OperandIndex{11})); -} - -TEST(graph_OperandIndexSequence, neg_replace) -{ - OperandIndexSequence iset{0, 1, 2, 3}; - - iset.replace(OperandIndex{1}, OperandIndex{9}); - ASSERT_FALSE(iset.contains(OperandIndex{1})); - ASSERT_TRUE(iset.contains(OperandIndex{9})); -} diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/test/core/ir/OperandSet.cc deleted file mode 100644 index 6cf9c88..0000000 --- a/runtime/onert/test/core/ir/OperandSet.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Operands.h" - -TEST(ir_Operands, neg_set_test) -{ - onert::ir::Operands set; - - onert::ir::Shape shape0{1, 2, 3}; - - onert::ir::Shape shape1(4); - shape1.dim(0) = 10; - shape1.dim(1) = 20; - shape1.dim(2) = 30; - shape1.dim(3) = 40; - - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - set.emplace(shape0, type); - set.emplace(shape1, type); - - ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true); - ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true); - ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false); - - ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1); - ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2); - ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3); -} diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/test/core/ir/OperationSet.cc deleted file mode 100644 index 4a17eeb..0000000 --- a/runtime/onert/test/core/ir/OperationSet.cc +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "MockNode.h" -#include "ir/Operations.h" - -using onert::ir::Operation; -using onert::ir::OperationIndex; -using onert::ir::Operations; - -TEST(ir_Operations, basic) -{ - Operations ops; - ops.push(std::unique_ptr(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); - OperationIndex idx{0u}; - ASSERT_EQ(ops.at(idx).getInputs().size(), 4); - ASSERT_EQ(ops.at(idx).getOutputs().size(), 3); -} - -TEST(ir_Operations, neg_at) -{ - Operations ops; - ops.push(std::unique_ptr(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7}))); - OperationIndex idx{99u}; - EXPECT_THROW(ops.at(idx), std::out_of_range); -} diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/test/core/ir/SetIO.cc deleted file mode 100644 index 68b4773..0000000 --- a/runtime/onert/test/core/ir/SetIO.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Graph.h" -#include "ir/Index.h" -#include "ir/OperandIndexSequence.h" -#include "ir/operation/Conv2D.h" -#include "ir/operation/Concat.h" - -#include - -#include - -using Index = onert::ir::IOIndex; -using IndexSet = onert::ir::OperandIndexSequence; - -TEST(ir_Operation_setIO, operation_setIO_conv) -{ - onert::ir::Graph graph; - - onert::ir::Shape shape{3}; - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - // Add Conv - using Graph = onert::ir::operation::Conv2D; - - auto input_operand = graph.addOperand(shape, type); - auto kernel_operand = graph.addOperand(shape, type); - auto bias_operand = graph.addOperand(shape, type); - IndexSet inputs{input_operand, kernel_operand, bias_operand}; - - Graph::Param conv_params; - conv_params.padding.type = onert::ir::PaddingType::SAME; - conv_params.stride.horizontal = 1; - conv_params.stride.vertical = 1; - conv_params.activation = onert::ir::Activation::NONE; - - auto output_operand = graph.addOperand(shape, type).value(); - IndexSet outputs{output_operand}; - - auto conv = std::make_unique(inputs, outputs, conv_params); - - ASSERT_NE(conv, nullptr); - ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); - conv->setInputs({8, 9, 10}); - ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value()); - ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8); -} - -TEST(ir_Operation_setIO, neg_operation_setIO_concat) -{ - onert::ir::Graph graph; - - onert::ir::Shape shape{3}; - - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - using Graph = onert::ir::operation::Concat; - - // Add Concat - IndexSet inputs; - for (int i = 0; i < 6; ++i) - { - inputs.append(graph.addOperand(shape, type)); - } - - Graph::Param concat_params{0}; - - auto output_operand = graph.addOperand(shape, type).value(); - IndexSet outputs{output_operand}; - - auto concat = std::make_unique(inputs, outputs, concat_params); - - ASSERT_NE(concat, nullptr); - ASSERT_EQ(concat->getInputs().size(), 6); - ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); - - concat->setInputs({80, 6, 9, 11}); - ASSERT_EQ(concat->getInputs().size(), 4); - ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value()); - ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80); - ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9); - ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range); -} diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/test/core/ir/Shape.cc deleted file mode 100644 index c24aeda..0000000 --- a/runtime/onert/test/core/ir/Shape.cc +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include - -TEST(ShapeTest, basic_test) -{ - { - onert::ir::Shape shape(3); - - shape.dim(0) = 1; - shape.dim(1) = 2; - shape.dim(2) = 3; - - ASSERT_EQ(shape.rank(), 3); - ASSERT_EQ(shape.num_elements(), 6); - ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); - ASSERT_EQ(shape.hasUnspecifiedDims(), false); - } - { - onert::ir::Shape shape; // scalar or rank is unspecified - - ASSERT_EQ(shape.rank(), 0); - ASSERT_EQ(shape.num_elements(), 1); - ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true); - ASSERT_EQ(shape.hasUnspecifiedDims(), false); - } -} - -TEST(ShapeTest, neg_basic_test) -{ - { - onert::ir::Shape shape(2); - - shape.dim(0) = 1; - shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM; - - ASSERT_EQ(shape.rank(), 2); - ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false); - ASSERT_EQ(shape.hasUnspecifiedDims(), true); - EXPECT_ANY_THROW(shape.num_elements()); - } -} diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/test/core/ir/UseDef.cc deleted file mode 100644 index 47c98f9..0000000 --- a/runtime/onert/test/core/ir/UseDef.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Graph.h" -#include "ir/verifier/Verifier.h" -#include -#include "MockNode.h" - -#include - -namespace -{ - -using IndexSet = onert::ir::OperandIndexSequence; -using Mock = onert_test::ir::SimpleMock; - -} // namespace - -TEST(ir_Operand, neg_usedef) -{ - onert::ir::Graph graph; - onert::ir::verifier::DAGChecker verifier; - - onert::ir::Shape shape(3); - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - // Model Input/Output - auto input_operand = graph.addOperand(shape, type); - auto output_operand = graph.addOperand(shape, type); - - graph.addInput(input_operand); - graph.addOutput(output_operand); - - // MockNode1 - auto operand_index1 = graph.addOperand(shape, type); - auto mocknode_index1 = - graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index1})); - - // MockNode2 - auto operand_index2 = graph.addOperand(shape, type); - auto mocknode_index2 = - graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index2})); - - // MockNode3(two input) - auto multiinput_index = graph.addOperation( - std::make_unique(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); - - graph.verify(); - - ASSERT_TRUE(verifier.verify(graph)); - - // Check def - ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1); - ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2); - ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index); - - ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2); - ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index); - - // Check use - ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true); - ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true); - ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false); - ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true); - ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true); - - ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2); - ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1); - ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0); -} diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/test/core/ir/Verifier.cc deleted file mode 100644 index b4be2d9..0000000 --- a/runtime/onert/test/core/ir/Verifier.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Operation.h" -#include "ir/Graph.h" -#include "ir/verifier/Verifier.h" -#include -#include "ir/Operand.h" -#include "MockNode.h" - -using IndexSet = onert::ir::OperandIndexSequence; -using Mock = onert_test::ir::SimpleMock; - -TEST(Verifier, dag_checker) -{ - onert::ir::Graph graph; - - onert::ir::Shape shape{3}; - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - auto operand1 = graph.addOperand(shape, type); - auto operand2 = graph.addOperand(shape, type); - - graph.addInput(operand1); - graph.addOutput(operand2); - - graph.addOperation(std::make_unique(IndexSet{operand1}, IndexSet{operand2})); - - onert::ir::verifier::DAGChecker verifier; - - ASSERT_TRUE(verifier.verify(graph)); -} - -TEST(Verifier, neg_edge_consistency_checker_1) -{ - onert::ir::Graph graph; - - onert::ir::Shape shape{3}; - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - auto operand1 = graph.addOperand(shape, type); - auto operand2 = graph.addOperand(shape, type); - - graph.addInput(operand1); - graph.addOutput(operand2); - - auto mock_op = std::make_unique(IndexSet{operand1}, IndexSet{operand2}); - auto op_ind = graph.addOperation(std::move(mock_op)); - - graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone - - onert::ir::verifier::EdgeChecker verifier; - ASSERT_FALSE(verifier.verify(graph)); -} - -TEST(Verifier, neg_edge_consistency_checker_2) -{ - onert::ir::Graph graph; - - onert::ir::Shape shape{3}; - onert::ir::TypeInfo type{onert::ir::DataType::INT32}; - - auto operand1 = graph.addOperand(shape, type); - auto operand2 = graph.addOperand(shape, type); - - graph.addInput(operand1); - graph.addOutput(operand2); - - auto mock_op = std::make_unique(IndexSet{operand1}, IndexSet{operand2}); - auto mock_op_ptr = mock_op.get(); - auto op_ind = graph.addOperation(std::move(mock_op)); - - mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone - - onert::ir::verifier::EdgeChecker verifier; - ASSERT_FALSE(verifier.verify(graph)); -} diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/test/core/util/Index.cc deleted file mode 100644 index 2d110e3..0000000 --- a/runtime/onert/test/core/util/Index.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "util/Index.h" - -using Index = ::onert::util::Index; - -TEST(Index, neg_index_test) -{ - Index idx1{1u}; - Index idx2{2u}; - Index idx3{idx1}; - - ASSERT_EQ(idx1, 1); - ASSERT_EQ(idx1, 1u); - ASSERT_EQ(idx1.value(), 1u); - ASSERT_NE(idx1, idx2); - ASSERT_EQ(idx1, idx3); -} diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/test/core/util/ObjectManager.cc deleted file mode 100644 index 78f044e..0000000 --- a/runtime/onert/test/core/util/ObjectManager.cc +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "util/ObjectManager.h" -#include "util/Index.h" - -using namespace onert; - -struct TestTag; -using Index = typename util::Index; - -TEST(ObjectManager, emplace) -{ - util::ObjectManager man; - - auto index = man.emplace(100); - ASSERT_EQ(man.at(index), 100); -} - -TEST(ObjectManager, neg_remove_1) -{ - util::ObjectManager man; - - Index index = man.emplace(100); - ASSERT_TRUE(man.exist(index)); - ASSERT_EQ(man.at(index), 100); - - man.remove(index); - ASSERT_FALSE(man.exist(index)); -} - -TEST(ObjectManager, neg_remove_2) -{ - util::ObjectManager man; - - auto index0 = man.emplace(100); - auto index1 = man.emplace(200); - ASSERT_TRUE(man.exist(index0)); - ASSERT_EQ(man.at(index0), 100); - ASSERT_TRUE(man.exist(index1)); - ASSERT_EQ(man.at(index1), 200); - - man.remove(index0); - ASSERT_FALSE(man.exist(index0)); - ASSERT_TRUE(man.exist(index1)); - ASSERT_EQ(man.at(index1), 200); -} - -TEST(ObjectManager, push) -{ - util::ObjectManager man; - - // Not specify index - auto index = man.push(std::make_unique(100)); - ASSERT_EQ(man.at(index), 100); - - // Specify index - auto index2 = man.push(std::make_unique(200), Index{33}); - ASSERT_EQ(index2.value(), 33); - ASSERT_EQ(man.at(index2), 200); - - auto index3 = man.push(std::make_unique(300)); - // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) - ASSERT_EQ(index3.value(), 34); - ASSERT_EQ(man.at(index3), 300); - - auto index4 = man.push(std::make_unique(400), Index{22}); - ASSERT_EQ(index4.value(), 22); - ASSERT_EQ(man.at(index4), 400); - - auto index5 = man.push(std::make_unique(500)); - // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1) - ASSERT_EQ(index5.value(), 35); - ASSERT_EQ(man.at(index5), 500); -} - -TEST(ObjectManager, neg_push) -{ - util::ObjectManager man; - - // Specify index - auto index = man.push(std::make_unique(100), Index{55}); - ASSERT_EQ(index.value(), 55); - ASSERT_EQ(man.at(index), 100); - - // Specify the same index - auto index2 = man.push(std::make_unique(200), Index{55}); - ASSERT_FALSE(index2.valid()); -} - -static const uint32_t kMaxUInt32 = std::numeric_limits::max(); - -TEST(ObjectManager, neg_push_undefined_index) -{ - util::ObjectManager man; - - // Try inserting invalid(undefined) index - auto index = man.push(std::make_unique(100), Index{kMaxUInt32}); - ASSERT_FALSE(index.valid()); - ASSERT_EQ(man.size(), 0); -} - -TEST(ObjectManager, neg_push_max_index) -{ - util::ObjectManager man; - - // Insert an object with maximum valid index - auto index = man.push(std::make_unique(100), Index{kMaxUInt32 - 1}); - ASSERT_EQ(index.value(), kMaxUInt32 - 1); - ASSERT_EQ(man.at(index), 100); - ASSERT_EQ(man.size(), 1); - - // Reached to the final index so next push/emplace must fail - auto index2 = man.push(std::make_unique(200)); - ASSERT_EQ(man.size(), 1); - ASSERT_FALSE(index2.valid()); -} - -TEST(ObjectManager, neg_emplace_max_index) -{ - util::ObjectManager man; - - // Insert an object with maximum valid index - auto index = man.push(std::make_unique(100), Index{kMaxUInt32 - 1}); - ASSERT_EQ(index.value(), kMaxUInt32 - 1); - ASSERT_EQ(man.at(index), 100); - ASSERT_EQ(man.size(), 1); - - // Reached to the final index so next push/emplace must fail - auto index3 = man.emplace(200); - ASSERT_EQ(man.size(), 1); - ASSERT_FALSE(index3.valid()); -} - -TEST(ObjectManager, const_iterate) -{ - util::ObjectManager man; - - auto index0 = man.emplace(100); - auto index1 = man.emplace(200); - auto index2 = man.emplace(300); - - int sum = 0; - man.iterate([&](const Index &index, const int &val) { sum += val; }); - ASSERT_EQ(sum, 600); -} - -TEST(ObjectManager, non_const_iterate) -{ - util::ObjectManager man; - - auto index0 = man.emplace(100); - auto index1 = man.emplace(200); - auto index2 = man.emplace(300); - - man.iterate([&](const Index &index, int &val) { val += 1; }); - ASSERT_EQ(man.at(index0), 101); - ASSERT_EQ(man.at(index1), 201); - ASSERT_EQ(man.at(index2), 301); -} - -TEST(ObjectManager, set) -{ - util::ObjectManager man; - auto index = man.set(Index{1}, std::make_unique(100)); // Insert - ASSERT_EQ(index, Index{1}); - auto index2 = man.set(index, std::make_unique(200)); // Overwrite - ASSERT_EQ(index2, index); - ASSERT_EQ(man.at(index2), 200); -} - -TEST(ObjectManager, neg_set) -{ - auto v = std::make_unique(100); - util::ObjectManager man; - auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index - ASSERT_EQ(index, Index{}); - ASSERT_FALSE(index.valid()); - ASSERT_NE(v, nullptr); // v must be kept when failure -} - -TEST(ObjectManager, getRawPtr) -{ - auto v = std::make_unique(100); - auto v_ptr = v.get(); - util::ObjectManager man; - auto index = man.push(std::move(v)); - ASSERT_EQ(v_ptr, man.getRawPtr(index)); -} - -TEST(ObjectManager, neg_getRawPtr) -{ - util::ObjectManager man; - auto ptr = man.getRawPtr(Index{1}); - ASSERT_EQ(ptr, nullptr); -} diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/test/core/util/ShapeInference.cc deleted file mode 100644 index 2ecaa28..0000000 --- a/runtime/onert/test/core/util/ShapeInference.cc +++ /dev/null @@ -1,545 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "ir/Layout.h" -#include "util/ShapeInference.h" - -using namespace onert::ir; - -TEST(ShapeInference, Elementwise) -{ - Shape lhs_shape{1, 299, 299, 3}; - Shape rhs_shape{3}; - auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.dim(0), 1); - ASSERT_EQ(infered_out_shape.dim(1), 299); - ASSERT_EQ(infered_out_shape.dim(2), 299); - ASSERT_EQ(infered_out_shape.dim(3), 3); -} - -TEST(ShapeInference, neg_Elementwise) -{ - Shape lhs_shape{1, 299, 299, 3}; - Shape rhs_shape{5, 3}; - ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error); -} - -TEST(ShapeInference, Pool2DNodeSame) -{ - Shape in_shape{10, 6, 12, 20}; - Stride stride{3, 7}; - Padding padding{PaddingType::SAME}; - - operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - - operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); -} - -TEST(ShapeInference, Pool2DNodeValid) -{ - Shape in_shape{10, 6, 12, 20}; - Stride stride{3, 7}; - Padding padding{PaddingType::VALID}; - - operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - - operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); -} - -TEST(ShapeInference, Pool2DNodeExplicit) -{ - Shape in_shape{10, 3, 5, 20}; - - Stride stride{3, 7}; - Padding padding{4, 3, 2, 1}; - - operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - - operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); -} - -TEST(ShapeInference, neg_Pool2DNode_InvalidStride) -{ - Shape in_shape{10, 6, 12, 20}; - Stride stride{0, 7}; - Padding padding{PaddingType::SAME}; - - operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; - ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param), - std::runtime_error); -} - -TEST(ShapeInference, Conv2D) -{ - Shape in_shape{10, 6, 12, 20}; - Shape ker_shape{30, 3, 6, 20}; - - operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE, - Dilation{1, 1}}; - auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); - - param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE, - Dilation{1, 1}}; - infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); - - param = - operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; - infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); -} - -TEST(ShapeInference, neg_Conv2D_InvalidStride) -{ - Shape in_shape{10, 6, 12, 20}; - Shape ker_shape{30, 3, 6, 20}; - - operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE, - Dilation{1, 1}}; - ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param), - std::runtime_error); -} - -TEST(ShapeInference, DepthwiseConv2D) -{ - Shape in_shape{10, 6, 12, 20}; - Shape ker_shape{1, 3, 6, 60}; - - operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, - Activation::NONE, Dilation{1, 1}}; - auto infered_out_shape = - onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); - - param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3, - Activation::NONE, Dilation{1, 1}}; - infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); - - param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE, - Dilation{1, 1}}; - infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); - - ASSERT_EQ(infered_out_shape.rank(), 4); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); - ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60); -} - -TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride) -{ - Shape in_shape{10, 6, 12, 20}; - Shape ker_shape{1, 3, 6, 60}; - - operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3, - Activation::NONE, Dilation{1, 1}}; - ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param), - std::runtime_error); -} - -TEST(ShapeInference, Concat) -{ - { - Shape in1{10, 20, 30, 3, 50}; - Shape in2{10, 20, 30, 2, 50}; - Shape in3{10, 20, 30, 2, 50}; - - operation::Concat::Param param{3}; - auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param); - - ASSERT_EQ(infered_out_shape.rank(), 5); - ASSERT_EQ(infered_out_shape.dim(0), 10); - ASSERT_EQ(infered_out_shape.dim(1), 20); - ASSERT_EQ(infered_out_shape.dim(2), 30); - ASSERT_EQ(infered_out_shape.dim(3), 7); - ASSERT_EQ(infered_out_shape.dim(4), 50); - } - { - // case 1. when axis < 0 - Shape in1{10, 20, 2}; - Shape in2{10, 20, 3}; - - operation::Concat::Param param{-1}; - auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); - - ASSERT_EQ(infered_out_shape.rank(), 3); - ASSERT_EQ(infered_out_shape.dim(0), 10); - ASSERT_EQ(infered_out_shape.dim(1), 20); - ASSERT_EQ(infered_out_shape.dim(2), 5); - } - { - // case 2. when axis < 0 - Shape in1{2, 20, 2}; - Shape in2{3, 20, 2}; - - operation::Concat::Param param{-3}; - auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param); - - ASSERT_EQ(infered_out_shape.rank(), 3); - ASSERT_EQ(infered_out_shape.dim(0), 5); - ASSERT_EQ(infered_out_shape.dim(1), 20); - ASSERT_EQ(infered_out_shape.dim(2), 2); - } -} - -TEST(ShapeInference, neg_Concat) -{ - { - operation::Concat::Param param{2}; - Shape in1{10, 1, 3}; - Shape in2{10, 2, 4}; // dim[1] should be 1 but 2 - - EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); - } - { // wrong rank - operation::Concat::Param param{2}; - Shape in1{10, 2, 3, 4}; - Shape in2{10, 2, 4}; // rank should be 4 - - EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param)); - } -} - -TEST(ShapeInference, ExpandDims) -{ - Shape in_shape{30, 40}; - - auto check = [&](int32_t axis, Shape &expected) { - auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis); - - ASSERT_EQ(actual.rank(), 3); - for (int32_t dim = 0; dim < expected.rank(); dim++) - ASSERT_EQ(actual.dim(dim), expected.dim(dim)); - }; - - { // boundary - int32_t axis = 0; - Shape expected{1, 30, 40}; - check(axis, expected); - } - { // boundary - int32_t axis = 2; - Shape expected{30, 40, 1}; - check(axis, expected); - } - { // inside - int32_t axis = 1; - Shape expected{30, 1, 40}; - check(axis, expected); - } - { // negative boundary - int32_t axis = -1; - Shape expected{30, 40, 1}; - check(axis, expected); - } - { // negative boundary - int32_t axis = -3; - Shape expected{1, 30, 40}; - check(axis, expected); - } -} - -TEST(ShapeInference, neg_ExpandDims) -{ - Shape in_shape{30, 40}; - - { // over boundary - int32_t axis = 3; - ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); - } - { // over boundary - int32_t axis = -4; - ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error); - } -} - -TEST(ShapeInference, FullyConnected) -{ - Shape in_shape{3, 4, 5, 6}; - Shape ker_shape{3, 10}; - auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape); - - ASSERT_EQ(infered_out_shape.rank(), 2); - ASSERT_EQ(infered_out_shape.dim(0), 36); - ASSERT_EQ(infered_out_shape.dim(1), 3); -} - -TEST(ShapeInference, Transpose) -{ - auto check = [&](Shape &in_shape, std::vector perm, Shape &expected) { - // pre-conditions - ASSERT_EQ(in_shape.rank(), perm.size()); - ASSERT_EQ(expected.rank(), perm.size()); - auto inferred_out_shape = - onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); - // post-conditions - ASSERT_EQ(inferred_out_shape.rank(), perm.size()); - for (int32_t dim = 0; dim < expected.rank(); dim++) - { - ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim)); - } - }; - // check for 2-D - { - Shape in_shape{2, 3}; - std::vector perm = {1, 0}; - Shape expected{3, 2}; - // int32_t rank = 2; - check(in_shape, perm, expected); - } - // check for 3-D - { - Shape in_shape{1, 2, 3}; - std::vector perm = {2, 0, 1}; - Shape expected{3, 1, 2}; - // int32_t rank = 3; - check(in_shape, perm, expected); - } - // check for 4-D - { - Shape in_shape{1, 2, 3, 4}; - std::vector perm = {1, 3, 0, 2}; - Shape expected{2, 4, 1, 3}; - // int32_t rank = 4; - check(in_shape, perm, expected); - } -} - -TEST(ShapeInference, neg_Transpose) -{ - Shape in_shape{1, 2, 3}; - // Invalid parameter size - { - std::vector perm = {2, 0, 1, 0}; - // int32_t rank = 3; - ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), - std::runtime_error); - } - // Invalid parameter value - { - std::vector perm = {2, 0, 3}; - // int32_t rank = 3; - ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()), - std::runtime_error); - } -} - -TEST(ShapeInference, Gather) -{ - auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) { - int rank = input.rank(); - auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank); - - ASSERT_EQ(actual.rank(), expected.rank()); - - for (int32_t dim = 0; dim < expected.rank(); dim++) - ASSERT_EQ(actual.dim(dim), expected.dim(dim)); - }; - - // check for 2-D, 3-D, axis 0 - { - Shape input{3, 4}; - Shape indices{1, 1, 2}; - int32_t axis = 0; - Shape expected{1, 1, 2, 4}; - check(input, indices, expected, axis); - } - - // check for 2-D, 3-D, axis 1 - { - Shape input{3, 4}; - Shape indices{1, 2, 1}; - int32_t axis = 1; - Shape expected{3, 1, 2, 1}; - check(input, indices, expected, axis); - } - - // check for 3-D, 2-D, axis 0 - { - Shape input{2, 3, 4}; - Shape indices{1, 2}; - int32_t axis = 0; - Shape expected{1, 2, 3, 4}; - check(input, indices, expected, axis); - } - - // check for 3-D, 2-D, axis 2 - { - Shape input{2, 3, 4}; - Shape indices{2, 1}; - int32_t axis = 2; - Shape expected{2, 3, 2, 1}; - check(input, indices, expected, axis); - } - - // check for 4D, axis 0 - { - Shape input{1, 2, 3, 4}; - Shape indices{2}; - int32_t axis = 0; - Shape expected{2, 2, 3, 4}; - check(input, indices, expected, axis); - } -} - -TEST(ShapeInference, BCQFullyConnected) -{ - auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector cluster, - Shape &expected) { - auto actual = - onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data()); - ASSERT_EQ(actual.rank(), expected.rank()); - - for (int32_t dim = 0; dim < expected.rank(); dim++) - ASSERT_EQ(actual.dim(dim), expected.dim(dim)); - }; - - { - Shape in_shape{10, 1}; - Shape cluster_shape{3, 2}; - std::vector cluster = {1, 10, 2, 10, 3, 10}; - - Shape expected{30, 1}; - check(in_shape, cluster_shape, cluster, expected); - } - - { - Shape in_shape{1, 1}; - Shape cluster_shape{1, 2}; - std::vector cluster = {3, 50}; - - Shape expected{50, 1}; - check(in_shape, cluster_shape, cluster, expected); - } -} - -TEST(ShapeInference, BCQGather) -{ - auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector cluster, - uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) { - operation::BCQGather::Param param{hidden_size, axis}; - auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape, - cluster.data(), rank, param); - ASSERT_EQ(actual.rank(), expected.rank()); - - for (int32_t dim = 0; dim < expected.rank(); dim++) - ASSERT_EQ(actual.dim(dim), expected.dim(dim)); - }; - - { - Shape indices_shape{5, 1}; - Shape cluster_shape{3, 2}; - std::vector cluster = {1, 10, 2, 10, 3, 10}; - uint32_t hidden_size = 10; - uint32_t axis = 0; - int rank = 2; - - Shape expected{5, 1, 10}; - check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); - } - - { - Shape indices_shape{5, 1}; - Shape cluster_shape{3, 2}; - std::vector cluster = {1, 10, 2, 10, 3, 10}; - uint32_t hidden_size = 10; - uint32_t axis = 1; - int rank = 2; - - Shape expected{30, 5, 1}; - check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected); - } -} diff --git a/runtime/service/CMakeLists.txt b/runtime/service/CMakeLists.txt new file mode 100644 index 0000000..5ea6cda --- /dev/null +++ b/runtime/service/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectories() diff --git a/runtime/service/npud/CMakeLists.txt b/runtime/service/npud/CMakeLists.txt new file mode 100644 index 0000000..8cf5164 --- /dev/null +++ b/runtime/service/npud/CMakeLists.txt @@ -0,0 +1,21 @@ +if(NOT BUILD_NPUD) + return() +endif(NOT BUILD_NPUD) + +nnfw_find_package(GLib2.0 REQUIRED) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_executable(npud ${SOURCES}) +set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX) +target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS}) +target_link_libraries(npud PRIVATE nnfw_lib_misc) +target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES}) +target_link_libraries(npud PRIVATE ${LIB_PTHREAD}) + +if(ENVVAR_NPUD_CONFIG) + target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG) +endif(ENVVAR_NPUD_CONFIG) + +install(TARGETS npud DESTINATION bin) diff --git a/runtime/service/npud/core/Server.cc b/runtime/service/npud/core/Server.cc new file mode 100644 index 0000000..5b15388 --- /dev/null +++ b/runtime/service/npud/core/Server.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Server.h" + +#include +#include + +namespace npud +{ +namespace core +{ + +std::atomic_bool Server::_isRunning(false); + +Server::Server() noexcept + : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique()) +{ +} + +void Server::run(void) +{ + VERBOSE(Server) << "Starting Server\n"; + + if (_isRunning.exchange(true)) + { + throw std::runtime_error("Mainloop is already running."); + } + + g_main_loop_run(_mainloop.get()); +} + +void Server::stop(void) +{ + VERBOSE(Server) << "Stop Server\n"; + + if (!_isRunning.load()) + { + throw std::runtime_error("Mainloop is not running"); + } + + while (!g_main_loop_is_running(_mainloop.get())) + { + std::this_thread::yield(); + } + + g_main_loop_quit(_mainloop.get()); + _isRunning = false; +} + +} // namespace core +} // namespace npud diff --git a/runtime/service/npud/core/Server.h b/runtime/service/npud/core/Server.h new file mode 100644 index 0000000..e2f37f8 --- /dev/null +++ b/runtime/service/npud/core/Server.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONE_SERVICE_NPUD_CORE_SERVER_H__ +#define __ONE_SERVICE_NPUD_CORE_SERVER_H__ + +#include "Signal.h" + +#include +#include +#include + +namespace npud +{ +namespace core +{ + +class Server +{ +public: + void run(void); + void stop(void); + + static Server &instance(void) + { + static Server server; + return server; + } + +private: + Server() noexcept; + + static std::atomic_bool _isRunning; + + std::unique_ptr _mainloop; + std::unique_ptr _signal; +}; + +} // namespace core +} // namespace npud + +#endif // __ONE_SERVICE_NPUD_CORE_SERVER_H__ diff --git a/runtime/service/npud/core/Signal.cc b/runtime/service/npud/core/Signal.cc new file mode 100644 index 0000000..085535a --- /dev/null +++ b/runtime/service/npud/core/Signal.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Signal.h" + +#include "Server.h" +#include + +#include + +namespace npud +{ +namespace core +{ + +Signal::Signal(void) noexcept { init(); } + +void Signal::init(void) +{ + // NOTE Types of signals + // SIGTERM: termination request, sent to the program + // SIGSEGV: invalid memory access (segmentation fault) + // SIGINT: external interrupt, usually initiated by the user + // SIGILL: invalid program image, such as invalid instruction + // SIGABRT: abnormal termination condition, as is e.g. initiated by std::abort() + // SIGFPE: erroneous arithmetic operation such as divide by zero + // from https://en.cppreference.com/w/cpp/utility/program/SIG_types + std::signal(SIGTERM, handleSignal); + std::signal(SIGSEGV, handleSignal); + std::signal(SIGINT, handleSignal); + std::signal(SIGILL, handleSignal); + std::signal(SIGABRT, handleSignal); + std::signal(SIGFPE, handleSignal); +} + +void Signal::handleSignal(int signum) +{ + VERBOSE(signal) << "Signal received: " << strsignal(signum) << "(" << signum << ")\n"; + Server::instance().stop(); +} + +} // namespace core +} // namespace npud diff --git a/runtime/service/npud/core/Signal.h b/runtime/service/npud/core/Signal.h new file mode 100644 index 0000000..ffddc72 --- /dev/null +++ b/runtime/service/npud/core/Signal.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONE_SERVICE_NPUD_CORE_SIGNAL_H__ +#define __ONE_SERVICE_NPUD_CORE_SIGNAL_H__ + +namespace npud +{ +namespace core +{ + +class Signal +{ +public: + Signal() noexcept; + + void init(void); + static void handleSignal(int signum); +}; + +} // namespace core +} // namespace npud + +#endif // __ONE_SERVICE_NPUD_CORE_SIGNAL_H__ diff --git a/runtime/service/npud/core/main.cc b/runtime/service/npud/core/main.cc new file mode 100644 index 0000000..bd885b2 --- /dev/null +++ b/runtime/service/npud/core/main.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Server.h" + +#include + +using namespace npud; + +int main(int argc, const char *argv[]) +{ + auto &server = core::Server::instance(); + + VERBOSE(main) << "Starting npud\n"; + try + { + server.run(); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + return 1; + } + + VERBOSE(main) << "Finished npud\n"; + return 0; +} diff --git a/runtime/service/npud/util/Config.lst b/runtime/service/npud/util/Config.lst new file mode 100644 index 0000000..d45b373 --- /dev/null +++ b/runtime/service/npud/util/Config.lst @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CONFIG +#error Define CONFIG before including this file +#endif + +// Name | Type | Default +CONFIG(NPUD_LOG_ENABLE , bool , "0") diff --git a/runtime/service/npud/util/ConfigSource.cc b/runtime/service/npud/util/ConfigSource.cc new file mode 100644 index 0000000..7a14b02 --- /dev/null +++ b/runtime/service/npud/util/ConfigSource.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConfigSource.h" + +#include +#include +#include + +#include +#include +#include +#include + +namespace npud +{ +namespace util +{ + +using namespace nnfw::misc; + +static std::unique_ptr _source; + +void config_source(std::unique_ptr &&source) { _source = std::move(source); } + +static IConfigSource *config_source() +{ + if (!_source) + { +#ifdef ENVVAR_FOR_DEFAULT_CONFIG + // Default ConfigSource is EnvConfigSource + _source = std::make_unique(); +#else + _source = std::make_unique(); +#endif // ENVVAR_FOR_DEFAULT_CONFIG + } + return _source.get(); +} + +static std::string getConfigOrDefault(const std::string &key) +{ + static std::unordered_map defaults; + if (defaults.empty()) + { +#define CONFIG(Name, Type, Default) \ + { \ + auto name = std::string{#Name}; \ + defaults.emplace(name, std::string{Default}); \ + } + +#include "Config.lst" + +#undef CONFIG + } + + // Treat empty string and absence of the value to be the same + auto ret = config_source()->get(key); + // if not found search from defaults + if (ret.empty()) + { + auto itr = defaults.find(key); + if (itr != defaults.end()) + { + // Return the default value if exists + ret = itr->second; + } + } + + return ret; +} + +bool toBool(const std::string &val) +{ + static const std::array false_list{"0", "OFF", "FALSE", "N", "NO"}; + auto false_found = std::find(false_list.begin(), false_list.end(), val); + return false_found == false_list.end(); +} + +int toInt(const std::string &val) { return std::stoi(val); } + +bool getConfigBool(const std::string &key) +{ + auto raw = getConfigOrDefault(key); + return toBool(raw); +} + +int getConfigInt(const std::string &key) +{ + auto raw = getConfigOrDefault(key); + return toInt(raw); +} + +std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); } + +} // namespace util +} // namespace npud + +namespace npud +{ +namespace util +{ +namespace config +{ + +#define CONFIG(Name, Type, Default) const char *Name = #Name; + +#include "Config.lst" + +#undef CONFIG + +} // namespace config +} // namespace util +} // namespace npud diff --git a/runtime/service/npud/util/ConfigSource.h b/runtime/service/npud/util/ConfigSource.h new file mode 100644 index 0000000..f4ecc79 --- /dev/null +++ b/runtime/service/npud/util/ConfigSource.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__ +#define __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__ + +#include + +namespace npud +{ +namespace util +{ + +bool getConfigBool(const std::string &key); +int getConfigInt(const std::string &key); +std::string getConfigString(const std::string &key); + +} // namespace util +} // namespace npud + +namespace npud +{ +namespace util +{ +namespace config +{ + +#define CONFIG(Name, Type, Default) extern const char *Name; + +#include "Config.lst" + +#undef CONFIG + +} // namespace config +} // namespace util +} // namespace npud + +#endif // __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__ diff --git a/runtime/service/npud/util/Logging.h b/runtime/service/npud/util/Logging.h new file mode 100644 index 0000000..0b75b39 --- /dev/null +++ b/runtime/service/npud/util/Logging.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONE_SERVICE_NPUD_UTIL_LOGGING_H__ +#define __ONE_SERVICE_NPUD_UTIL_LOGGING_H__ + +#include +#include + +#include "ConfigSource.h" + +namespace npud +{ +namespace util +{ +namespace logging +{ +class Context +{ +public: + Context() noexcept : _enabled{false} + { + const auto env = util::getConfigBool(util::config::NPUD_LOG_ENABLE); + + if (env) + { + _enabled = true; + } + } + + static Context &get() noexcept + { + static Context ctx; + return ctx; + } + +public: + bool enabled(void) const { return _enabled; } + +private: + bool _enabled; +}; + +static Context &ctx = Context::get(); + +inline std::string decorated_name(const char *input) +{ + const int min_prefix = 16; + std::string prefix(input); + auto len_prefix = prefix.size(); + if (len_prefix > min_prefix) + return "[" + prefix + "] "; + std::string spaces((min_prefix - len_prefix) / 2, ' '); + return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] "; +} +} // namespace logging +} // namespace util +} // namespace npud + +#define VERBOSE(name) \ + if (::npud::util::logging::ctx.enabled()) \ + std::cout << ::npud::util::logging::decorated_name(#name) + +#define VERBOSE_F() \ + if (::npud::util::logging::ctx.enabled()) \ + std::cout << ::npud::util::logging::decorated_name(__func__) + +#define WHEN_LOG_ENABLED(METHOD) \ + if (::npud::util::logging::ctx.enabled()) \ + do \ + { \ + METHOD; \ + } while (0) + +#endif // __ONE_SERVICE_NPUD_UTIL_LOGGING_H__ diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt index 67ac90f..c1fa308 100644 --- a/tests/nnapi/CMakeLists.txt +++ b/tests/nnapi/CMakeLists.txt @@ -7,11 +7,6 @@ if (NOT BUILD_ONERT) return() endif(NOT BUILD_ONERT) -# GCC Compiler under 6.2 is not support this test build -if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2) - return() -endif() - if (ANDROID_BOOST_ROOT) set(BOOST_ROOT ${ANDROID_BOOST_ROOT}) endif (ANDROID_BOOST_ROOT) diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc index e4e4ba1..4f1c7f9 100644 --- a/tests/nnfw_api/src/CircleGen.cc +++ b/tests/nnfw_api/src/CircleGen.cc @@ -269,6 +269,20 @@ uint32_t CircleGen::addOperatorFloorDiv(const OperatorParams ¶ms) circle::BuiltinOptions_NONE, 0); } +uint32_t CircleGen::addOperatorGreater(const OperatorParams ¶ms) +{ + auto options = circle::CreateLessOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER, + circle::BuiltinOptions_GreaterOptions, options); +} + +uint32_t CircleGen::addOperatorGreaterEqual(const OperatorParams ¶ms) +{ + auto options = circle::CreateGreaterOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER_EQUAL, + circle::BuiltinOptions_GreaterEqualOptions, options); +} + uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams ¶ms) { auto options = circle::CreateL2NormOptions(_fbb).Union(); @@ -283,6 +297,13 @@ uint32_t CircleGen::addOperatorLess(const OperatorParams ¶ms) circle::BuiltinOptions_LessOptions, options); } +uint32_t CircleGen::addOperatorLessEqual(const OperatorParams ¶ms) +{ + auto options = circle::CreateLessOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_LESS_EQUAL, + circle::BuiltinOptions_LessEqualOptions, options); +} + uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha) { auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union(); @@ -319,6 +340,13 @@ uint32_t CircleGen::addOperatorNeg(const OperatorParams ¶ms) circle::BuiltinOptions_NegOptions, options); } +uint32_t CircleGen::addOperatorNotEqual(const OperatorParams ¶ms) +{ + auto options = circle::CreateEqualOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_NOT_EQUAL, + circle::BuiltinOptions_NotEqualOptions, options); +} + uint32_t CircleGen::addOperatorOneHot(const OperatorParams ¶ms, int32_t axis) { auto options = circle::CreateOneHotOptions(_fbb, axis).Union(); diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h index 062a8d3..d780eb1 100644 --- a/tests/nnfw_api/src/CircleGen.h +++ b/tests/nnfw_api/src/CircleGen.h @@ -174,16 +174,20 @@ public: uint32_t addOperatorFullyConnected(const OperatorParams ¶ms, circle::FullyConnectedOptionsWeightsFormat weights_format = circle::FullyConnectedOptionsWeightsFormat_DEFAULT); + uint32_t addOperatorGreater(const OperatorParams ¶ms); + uint32_t addOperatorGreaterEqual(const OperatorParams ¶ms); uint32_t addOperatorIf(const OperatorParams ¶ms, uint32_t then_subg, uint32_t else_subg); uint32_t addOperatorInstanceNorm(const OperatorParams ¶ms, float epsilon, circle::ActivationFunctionType actfn); uint32_t addOperatorL2Normalization(const OperatorParams ¶ms); uint32_t addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha); uint32_t addOperatorLess(const OperatorParams ¶ms); + uint32_t addOperatorLessEqual(const OperatorParams ¶ms); uint32_t addOperatorLogSoftmax(const OperatorParams ¶ms); uint32_t addOperatorMul(const OperatorParams ¶ms, circle::ActivationFunctionType actfn); uint32_t addOperatorMean(const OperatorParams ¶ms, bool keep_dims); uint32_t addOperatorNeg(const OperatorParams ¶ms); + uint32_t addOperatorNotEqual(const OperatorParams ¶ms); uint32_t addOperatorOneHot(const OperatorParams ¶ms, int32_t axis); uint32_t addOperatorPad(const OperatorParams ¶ms); uint32_t addOperatorPadV2(const OperatorParams ¶ms); diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h index eee50d1..90b7cfc 100644 --- a/tests/nnfw_api/src/GenModelTest.h +++ b/tests/nnfw_api/src/GenModelTest.h @@ -398,7 +398,9 @@ protected: // Check output tensor values auto &ref_output = ref_outputs[i]; auto &output = _so.outputs[i]; - ASSERT_EQ(output.size(), ref_output.size()); + auto expected_tensor_size = ref_output.size(); + auto actual_tensor_size = output.size(); + ASSERT_EQ(expected_tensor_size, actual_tensor_size) << "Output #" << i; switch (ti.dtype) { @@ -419,9 +421,10 @@ protected: // TODO better way for handling FP error? for (uint32_t e = 0; e < ref_output.size() / sizeof(float); e++) { - float refval = reinterpret_cast(ref_output.data())[e]; - float val = reinterpret_cast(output.data())[e]; - EXPECT_NEAR(refval, val, 0.001) << "Output #" << i << ", Element Index : " << e; + float expected = reinterpret_cast(ref_output.data())[e]; + float actual = reinterpret_cast(output.data())[e]; + EXPECT_NEAR(expected, actual, 0.001) + << "Output #" << i << ", Element Index : " << e; } break; case NNFW_TYPE_TENSOR_INT64: @@ -445,9 +448,9 @@ private: { for (uint32_t e = 0; e < ref_buf.size() / sizeof(T); e++) { - T ref = reinterpret_cast(ref_buf.data())[e]; - T act = reinterpret_cast(act_buf.data())[e]; - EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e; + T expected = reinterpret_cast(ref_buf.data())[e]; + T actual = reinterpret_cast(act_buf.data())[e]; + EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e; } } @@ -457,10 +460,10 @@ private: for (uint32_t e = 0; e < ref_buf.size() / sizeof(uint8_t); e++) { uint8_t ref_raw = reinterpret_cast(ref_buf.data())[e]; - bool ref = (ref_raw != 0 ? true : false); + bool expected = (ref_raw != 0 ? true : false); uint8_t act_raw = reinterpret_cast(act_buf.data())[e]; - bool act = (act_raw != 0 ? true : false); - EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e; + bool actual = (act_raw != 0 ? true : false); + EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e; } } diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.test.cc similarity index 100% rename from tests/nnfw_api/src/GenModelTests.cc rename to tests/nnfw_api/src/GenModelTests.test.cc diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc similarity index 100% rename from tests/nnfw_api/src/ModelTestDynamicTensor.cc rename to tests/nnfw_api/src/ModelTestDynamicTensor.test.cc diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.cc b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc similarity index 100% rename from tests/nnfw_api/src/ModelTestInputReshaping.cc rename to tests/nnfw_api/src/ModelTestInputReshaping.test.cc diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.test.cc similarity index 100% rename from tests/nnfw_api/src/RegressionTests.cc rename to tests/nnfw_api/src/RegressionTests.test.cc diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestAddModelLoaded.cc rename to tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc rename to tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc rename to tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestMultipleSessions.cc rename to tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc diff --git a/tests/nnfw_api/src/ValidationTestPipelineSession.cc b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestPipelineSession.cc rename to tests/nnfw_api/src/ValidationTestPipelineSession.test.cc diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestSessionCreated.cc rename to tests/nnfw_api/src/ValidationTestSessionCreated.test.cc diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc similarity index 100% rename from tests/nnfw_api/src/ValidationTestSingleSession.cc rename to tests/nnfw_api/src/ValidationTestSingleSession.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc deleted file mode 100644 index c210229..0000000 --- a/tests/nnfw_api/src/one_op_tests/Add.cc +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -TEST_F(GenModelTest, OneOp_Add_VarToConst) -{ - CircleGen cgen; - std::vector rhs_data{5, 4, 7, 4}; - uint32_t rhs_buf = cgen.addBuffer(rhs_data); - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf}); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}}, {{6, 7, 9, 8}})); - _context->addTestCase(uniformTCD({{0, 1, 2, 3}}, {{5, 5, 9, 7}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Add_VarToVar) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Add_VarToVarUint8) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Add_VarToVarInt8) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2); - int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Add_VarToVarSame) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}}, {{2, 6, 4, 8}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Add_VarToVarSize0) -{ - CircleGen cgen; - int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE); - cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({a, b, c}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{}, {}, {}}, {{}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_InvalidType) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst) -{ - CircleGen cgen; - std::vector rhs_data{5, 4, 0, 7, 4, 0}; - uint32_t rhs_buf = cgen.addBuffer(rhs_data); - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf}); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_OneOperand) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_NoOutput) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, - static_cast(128) /* Invalid value*/); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}})); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape) -{ - CircleGen cgen; - int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32}); - int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE); - cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({a, b, c}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailCompile(); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, net_OneOp_Add_VarToVarInt16) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6); - cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - // _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Add.test.cc b/tests/nnfw_api/src/one_op_tests/Add.test.cc new file mode 100644 index 0000000..9fc0e86 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Add.test.cc @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +TEST_F(GenModelTest, OneOp_Add_VarToConst) +{ + CircleGen cgen; + std::vector rhs_data{5, 4, 7, 4}; + uint32_t rhs_buf = cgen.addBuffer(rhs_data); + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}}, {{6, 7, 9, 8}})); + _context->addTestCase(uniformTCD({{0, 1, 2, 3}}, {{5, 5, 9, 7}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Add_VarToVar) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Add_VarToVarUint8) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Add_VarToVarInt8) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2); + int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Add_VarToVarSame) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}}, {{2, 6, 4, 8}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Add_VarToVarSize0) +{ + CircleGen cgen; + int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE); + cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({a, b, c}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{}, {}, {}}, {{}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst) +{ + CircleGen cgen; + std::vector rhs_data{5, 4, 0, 7, 4, 0}; + uint32_t rhs_buf = cgen.addBuffer(rhs_data); + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf}); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_OneOperand) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_NoOutput) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, + static_cast(128) /* Invalid value*/); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}})); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape) +{ + CircleGen cgen; + int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32}); + int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE); + cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({a, b, c}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailCompile(); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Add_VarToVarInt16) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + // _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/AddN.cc b/tests/nnfw_api/src/one_op_tests/AddN.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/AddN.cc rename to tests/nnfw_api/src/one_op_tests/AddN.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc deleted file mode 100644 index dda0986..0000000 --- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -struct ArgMinMaxVariationParam -{ - TestCaseData tcd; - bool is_argmax = true; - circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; -}; - -class ArgMinMaxVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -// Input shape: {1, 2, 2, 1} -// Reduce axis: 1 -// Output shape: {1, 2, 1} -// Output type: Int32 -// Test with different input type and value -INSTANTIATE_TEST_CASE_P( - GenModelTest, ArgMinMaxVariation, - ::testing::Values( - // ArgMax, float input - ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), - true}, - // ArgMax, int32 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, - circle::TensorType::TensorType_INT32}, - // ArgMax, uint8 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, - circle::TensorType::TensorType_UINT8, 1.0, 1}, - // ArgMax, int8 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, - circle::TensorType::TensorType_INT8, 1.0, 1}, - // ArgMin, float input - ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), - false}, - // ArgMin, int32 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, - circle::TensorType::TensorType_INT32}, - // ArgMin, uint8 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, - circle::TensorType::TensorType_UINT8, 1.0, 1}, - // ArgMin, int8 input - ArgMinMaxVariationParam{ - TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, - circle::TensorType::TensorType_INT8, 1.0, 1})); - -TEST_P(ArgMinMaxVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{1}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT64; - std::vector axis_data{1}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0})); - _context->setBackends({"acl_cl", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in, axis}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{} - .addInput({1, 4, 2, 3}) - .addInput({-3}) - .addOutput({1, 0})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_P(ArgMinMaxVariation, neg_InvalidAxis0) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailCompile(); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_P(ArgMinMaxVariation, neg_InvalidAxis1) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{-3}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{2}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_ArgMax_InType) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(ArgMinMaxVariation, neg_AxisType) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_FLOAT32; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(ArgMinMaxVariation, neg_paramType) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - const auto output_param = circle::TensorType::TensorType_INT64; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc new file mode 100644 index 0000000..1321552 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +struct ArgMinMaxVariationParam +{ + TestCaseData tcd; + bool is_argmax = true; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class ArgMinMaxVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: {1, 2, 2, 1} +// Reduce axis: 1 +// Output shape: {1, 2, 1} +// Output type: Int32 +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P( + GenModelTest, ArgMinMaxVariation, + ::testing::Values( + // ArgMax, float input + ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), + true}, + // ArgMax, int32 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_INT32}, + // ArgMax, uint8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_UINT8, 1.0, 1}, + // ArgMax, int8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_INT8, 1.0, 1}, + // ArgMin, float input + ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), + false}, + // ArgMin, int32 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_INT32}, + // ArgMin, uint8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_UINT8, 1.0, 1}, + // ArgMin, int8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_INT8, 1.0, 1})); + +TEST_P(ArgMinMaxVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT64; + std::vector axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0})); + _context->setBackends({"acl_cl", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in, axis}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{} + .addInput({1, 4, 2, 3}) + .addInput({-3}) + .addOutput({1, 0})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_P(ArgMinMaxVariation, neg_InvalidAxis0) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailCompile(); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_P(ArgMinMaxVariation, neg_InvalidAxis1) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{-3}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{2}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_InType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(ArgMinMaxVariation, neg_AxisType) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_FLOAT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(ArgMinMaxVariation, neg_paramType) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + const auto output_param = circle::TensorType::TensorType_INT64; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc deleted file mode 100644 index 15ddac2..0000000 --- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -struct AvgPool2DParam -{ - TestCaseData tcd; - std::vector input_shape; - std::vector output_shape; - struct filter_stride - { - int32_t filter_w; - int32_t filter_h; - int32_t stride_w; - int32_t stride_h; - } param = {1, 1, 1, 1}; - struct data_type - { - circle::TensorType data_type; - float scale; - int64_t zero_point; - } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0}; - std::vector backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"}; -}; - -class AveragePool2DVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -// Test with different input type and value -INSTANTIATE_TEST_CASE_P( - GenModelTest, AveragePool2DVariation, - ::testing::Values( - // float data - AvgPool2DParam{ - uniformTCD({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}}, - // float data - large - AvgPool2DParam{uniformTCD({std::vector(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}), - {1, 18, 36, 2}, - {1, 1, 2, 2}, - {18, 18, 18, 18}}, - // uint8_t data - AvgPool2DParam{uniformTCD({{2, 6, 4, 8}}, {{5}}), - {1, 2, 2, 1}, - {1, 1, 1, 1}, - {2, 2, 2, 2}, - {circle::TensorType::TensorType_UINT8, 1.2, 3}, - {"acl_cl", "acl_neon", "cpu"}}, - // uint8_t data -large - AvgPool2DParam{ - uniformTCD({{std::vector(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}), - {1, 18, 36, 2}, - {1, 1, 2, 2}, - {18, 18, 18, 18}, - {circle::TensorType::TensorType_UINT8, 1.2, 3}, - {"acl_cl", "acl_neon", "cpu"}}, - // int8_t data - // TODO enable acl-cl, acl-neon backend - AvgPool2DParam{uniformTCD({{2, -6, 4, -8}}, {{-2}}), - {1, 2, 2, 1}, - {1, 1, 1, 1}, - {2, 2, 2, 2}, - {circle::TensorType::TensorType_INT8, 2.0, -1}, - {"cpu"}}, - // int8_t data - large - // TODO enable acl-cl, acl-neon backend - AvgPool2DParam{ - uniformTCD({{std::vector(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}), - {1, 18, 36, 2}, - {1, 1, 2, 2}, - {18, 18, 18, 18}, - {circle::TensorType::TensorType_INT8, 2.0, -1}, - {"cpu"}})); - -TEST_P(AveragePool2DVariation, Test) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, - param.param.stride_h, param.param.filter_w, param.param.filter_h, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends(param.backend); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput) -{ - // 3D Tensors are not supported - CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput) -{ - // 2D Tensors are not supported - CircleGen cgen; - int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(AveragePool2DVariation, neg_InvalidPaddingType) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast(99), - param.param.stride_w, param.param.stride_h, param.param.filter_w, - param.param.filter_h, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, - param.param.stride_h, -1, param.param.filter_h, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, - param.param.stride_h, param.param.filter_w, 0, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(AveragePool2DVariation, neg_InvalidStrides_1) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h, - param.param.filter_w, param.param.filter_h, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(AveragePool2DVariation, neg_InvalidStrides_2) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100, - param.param.filter_w, param.param.filter_h, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc new file mode 100644 index 0000000..8276ca4 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct AvgPool2DParam +{ + TestCaseData tcd; + std::vector input_shape; + std::vector output_shape; + struct filter_stride + { + int32_t filter_w; + int32_t filter_h; + int32_t stride_w; + int32_t stride_h; + } param = {1, 1, 1, 1}; + struct data_type + { + circle::TensorType data_type; + float scale; + int64_t zero_point; + } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0}; + std::vector backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"}; +}; + +class AveragePool2DVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P( + GenModelTest, AveragePool2DVariation, + ::testing::Values( + // float data + AvgPool2DParam{ + uniformTCD({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}}, + // float data - large + AvgPool2DParam{uniformTCD({std::vector(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}), + {1, 18, 36, 2}, + {1, 1, 2, 2}, + {18, 18, 18, 18}}, + // uint8_t data + AvgPool2DParam{uniformTCD({{2, 6, 4, 8}}, {{5}}), + {1, 2, 2, 1}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {circle::TensorType::TensorType_UINT8, 1.2, 3}, + {"acl_cl", "acl_neon", "cpu"}}, + // uint8_t data -large + AvgPool2DParam{ + uniformTCD({{std::vector(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}), + {1, 18, 36, 2}, + {1, 1, 2, 2}, + {18, 18, 18, 18}, + {circle::TensorType::TensorType_UINT8, 1.2, 3}, + {"acl_cl", "acl_neon", "cpu"}}, + // int8_t data + // TODO enable acl-cl, acl-neon backend + AvgPool2DParam{uniformTCD({{2, -6, 4, -8}}, {{-2}}), + {1, 2, 2, 1}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {circle::TensorType::TensorType_INT8, 2.0, -1}, + {"cpu"}}, + // int8_t data - large + // TODO enable acl-cl, acl-neon backend + AvgPool2DParam{ + uniformTCD({{std::vector(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}), + {1, 18, 36, 2}, + {1, 1, 2, 2}, + {18, 18, 18, 18}, + {circle::TensorType::TensorType_INT8, 2.0, -1}, + {"cpu"}})); + +TEST_P(AveragePool2DVariation, Test) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, param.param.filter_w, param.param.filter_h, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backend); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput) +{ + // 3D Tensors are not supported + CircleGen cgen; + int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput) +{ + // 2D Tensors are not supported + CircleGen cgen; + int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(AveragePool2DVariation, neg_InvalidPaddingType) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast(99), + param.param.stride_w, param.param.stride_h, param.param.filter_w, + param.param.filter_h, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, -1, param.param.filter_h, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, param.param.filter_w, 0, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(AveragePool2DVariation, neg_InvalidStrides_1) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h, + param.param.filter_w, param.param.filter_h, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(AveragePool2DVariation, neg_InvalidStrides_2) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100, + param.param.filter_w, param.param.filter_h, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc rename to tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Cast.cc b/tests/nnfw_api/src/one_op_tests/Cast.cc deleted file mode 100644 index 928df2d..0000000 --- a/tests/nnfw_api/src/one_op_tests/Cast.cc +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, from_t}); - int out = cgen.addTensor({{1, 2, 2, 1}, to_t}); - cgen.addOperatorCast({{in}, {out}}, from_t, to_t); - cgen.setInputsAndOutputs({in}, {out}); - return cgen; -} - -TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32) -{ - CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32) -{ - CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32) -{ - CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8) -{ - CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{} - .addInput({true, false, true, true}) - .addOutput(std::vector{1, 0, 1, 1})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Cast_BoolToInt32) -{ - CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Cast_AfterEqual) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorEqual({{lhs, rhs}, {equal_out}}); - cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL, - circle::TensorType::TensorType_FLOAT32); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0) -{ - CircleGen cgen; - int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32, - circle::TensorType::TensorType_INT32); - cgen.setInputsAndOutputs({}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32, - circle::TensorType::TensorType_FLOAT32); - cgen.setInputsAndOutputs({lhs, rhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32, - circle::TensorType::TensorType_FLOAT32); - cgen.setInputsAndOutputs({in}, {}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); - cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32, - circle::TensorType::TensorType_FLOAT32); - cgen.setInputsAndOutputs({in}, {out1, out2}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Cast.test.cc b/tests/nnfw_api/src/one_op_tests/Cast.test.cc new file mode 100644 index 0000000..b4cfa6f --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Cast.test.cc @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, from_t}); + int out = cgen.addTensor({{1, 2, 2, 1}, to_t}); + cgen.addOperatorCast({{in}, {out}}, from_t, to_t); + cgen.setInputsAndOutputs({in}, {out}); + return cgen; +} + +TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{} + .addInput({true, false, true, true}) + .addOutput(std::vector{1, 0, 1, 1})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_BoolToInt32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_Uint8ToFloat32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_UINT8, circle::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + // clang-format off + _context->addTestCase( + TestCaseData{}.addInput({0, 100, 200, 255}) + .addOutput({0., 100., 200., 255.})); + // clang-format on + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_Int64ToFloat32) +{ + CircleGen cgen = genSimpleCastModel(circle::TensorType_INT64, circle::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{} + .addInput({-12345, 3, 100, 2147483648}) + .addOutput({-12345., 3., 100., 2147483648.})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Cast_AfterEqual) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorEqual({{lhs, rhs}, {equal_out}}); + cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL, + circle::TensorType::TensorType_FLOAT32); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0) +{ + CircleGen cgen; + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32, + circle::TensorType::TensorType_INT32); + cgen.setInputsAndOutputs({}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32, + circle::TensorType::TensorType_FLOAT32); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32, + circle::TensorType::TensorType_FLOAT32); + cgen.setInputsAndOutputs({in}, {}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32, + circle::TensorType::TensorType_FLOAT32); + cgen.setInputsAndOutputs({in}, {out1, out2}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc deleted file mode 100644 index f4397ba..0000000 --- a/tests/nnfw_api/src/one_op_tests/Concat.cc +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE); - cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3, - circle::ActivationFunctionType_NONE); - cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}}); - cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{1, 3, 2, 4}, {5, 4, 7, 4}}, - {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -struct ConcatVariationParam -{ - TestCaseData tcd; - circle::TensorType type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; -}; - -class ConcatVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -// Input shape: {2, 3} / {2, 3} -// Output shape: {4, 3} -INSTANTIATE_TEST_CASE_P( - GenModelTest, ConcatVariation, - ::testing::Values( - // Float - ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})}, - // Uint8 - ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), - circle::TensorType::TensorType_UINT8, 1.0f, -2}, - // Int8 - ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), - circle::TensorType::TensorType_INT8, 1.0f, -2}, - // Int16 - // TODO Enable when nnfw api support int16 type - // ConcatVariationParam{ - // uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), - // circle::TensorType::TensorType_INT16, 1.0f, 0}, - // Int32 - ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), - circle::TensorType::TensorType_INT32}, - // Int64 - ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), - circle::TensorType::TensorType_INT64})); - -TEST_P(ConcatVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); - cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({input1, input2}, {output}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) -{ - CircleGen cgen; - int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32}); - int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - std::vector axis_data{3}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - - int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); - int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); - int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); - int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); - - int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - - int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); - - int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32}); - - cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4); - - cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - - cgen.addOperatorAdd({{c_out1, in2}, {a_out1}}, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - cgen.addOperatorAdd({{c_out2, in2}, {a_out2}}, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - cgen.addOperatorAdd({{c_out3, in2}, {a_out3}}, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - - cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3, - circle::ActivationFunctionType::ActivationFunctionType_NONE); - - cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3, - a_out1, a_out2, a_out3, final_out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - { - // inputs - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1 - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2 - }, - { - // outputs - {1, 2, 3, 4, 5}, // s_out1 - {6, 7, 8, 9, 10}, // s_out2 - {11, 12, 13, 14, 15}, // s_out3 - {16, 17, 18, 19, 20}, // s_out4 - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1 - {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2 - {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3 - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1 - {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2 - {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3 - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, - 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out - })); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_P(ConcatVariation, neg_InvalidAxis) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); - int axis = 2; - - cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({input1, input2}, {output}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(ConcatVariation, neg_InvalidRank) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point); - int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point); - int axis = 0; - - cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({input1, input2}, {output}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(ConcatVariation, neg_InvalidDimension) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point); - int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); - int axis = 0; - - cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({input1, input2}, {output}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Concat.test.cc b/tests/nnfw_api/src/one_op_tests/Concat.test.cc new file mode 100644 index 0000000..4f83603 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Concat.test.cc @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); + int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE); + cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3, + circle::ActivationFunctionType_NONE); + cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{1, 3, 2, 4}, {5, 4, 7, 4}}, + {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +struct ConcatVariationParam +{ + TestCaseData tcd; + circle::TensorType type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class ConcatVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: {2, 3} / {2, 3} +// Output shape: {4, 3} +INSTANTIATE_TEST_SUITE_P( + GenModelTest, ConcatVariation, + ::testing::Values( + // Float + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})}, + // Uint8 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_UINT8, 1.0f, -2}, + // Int8 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT8, 1.0f, -2}, + // Int16 + // TODO Enable when nnfw api support int16 type + // ConcatVariationParam{ + // uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + // circle::TensorType::TensorType_INT16, 1.0f, 0}, + // Int32 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT32}, + // Int64 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT64})); + +TEST_P(ConcatVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); + cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({input1, input2}, {output}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) +{ + CircleGen cgen; + int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32}); + int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + std::vector axis_data{3}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + + int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); + int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); + int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); + int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32}); + + int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + + int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32}); + + int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32}); + + cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4); + + cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + + cgen.addOperatorAdd({{c_out1, in2}, {a_out1}}, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + cgen.addOperatorAdd({{c_out2, in2}, {a_out2}}, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + cgen.addOperatorAdd({{c_out3, in2}, {a_out3}}, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + + cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3, + circle::ActivationFunctionType::ActivationFunctionType_NONE); + + cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3, + a_out1, a_out2, a_out3, final_out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + { + // inputs + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1 + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2 + }, + { + // outputs + {1, 2, 3, 4, 5}, // s_out1 + {6, 7, 8, 9, 10}, // s_out2 + {11, 12, 13, 14, 15}, // s_out3 + {16, 17, 18, 19, 20}, // s_out4 + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1 + {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2 + {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3 + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1 + {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2 + {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3 + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, + 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out + })); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_P(ConcatVariation, neg_InvalidAxis) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); + int axis = 2; + + cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({input1, input2}, {output}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(ConcatVariation, neg_InvalidRank) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point); + int axis = 0; + + cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({input1, input2}, {output}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(ConcatVariation, neg_InvalidDimension) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); + int axis = 0; + + cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({input1, input2}, {output}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.cc deleted file mode 100644 index 4f58e3d..0000000 --- a/tests/nnfw_api/src/one_op_tests/Conv2D.cc +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -TEST_F(GenModelTest, OneOp_Conv2D) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE, 1, 1); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, - {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Conv2D_Stride) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, - circle::ActivationFunctionType_NONE, 1, 1); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, - {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Conv2D_Dilation) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE, 2, 2); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, - {{-52, 7}})); - _context->setBackends({"cpu", "ruy", "xnnpack"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Conv2D_I8) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 2, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); - int weight = - cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0); - int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); - int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{10, 10, 10}}, {{15, 38, 61}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 0, 0}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); - std::vector weight_scales = {0.5, 1, 0.5}; - std::vector weight_zeropoints = {0, 0, 0}; - int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, - weight_scales, weight_zeropoints); - int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); - int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{10, 10, 10}}, {{15, 30, 60}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Conv2D_Type) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE, 1, 1); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0, - circle::ActivationFunctionType_NONE, 1, 1); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation) -{ - CircleGen cgen; - std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{2, 3}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE, 0, 0); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 2, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); - int weight = - cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17); - int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); - int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 2, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); - std::vector weight_scales = {0.5, 1, 0.5}; - std::vector weight_zeropoints = {0, 0, 10}; - int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, - weight_scales, weight_zeropoints); - int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); - int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0); - cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc new file mode 100644 index 0000000..dccf2e5 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +TEST_F(GenModelTest, OneOp_Conv2D) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_Stride) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 2, 2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{-52, 7}})); + _context->setBackends({"cpu", "ruy", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_I8) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 2, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); + int weight = + cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{10, 10, 10}}, {{15, 38, 61}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 0, 0}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); + std::vector weight_scales = {0.5, 1, 0.5}; + std::vector weight_zeropoints = {0, 0, 0}; + int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, + weight_scales, weight_zeropoints); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{10, 10, 10}}, {{15, 30, 60}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_U8_PerChannel) +{ + CircleGen cgen; + // weight + std::vector weight_data{2, 6, 2, 1, 2, 3, 2, 3, 4}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector weight_scales = {.5, 1, 2}; + std::vector weight_zeropoints = {2, 0, 1}; + int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_UINT8, weight_buf}, + weight_scales, weight_zeropoints); + // bias + std::vector bias_data{4, -8, -4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0); + + // in and out + int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 2., 1); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2); + + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{5, 3, 7}}, {{5, 11, 24}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Type) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 0, 0); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 2, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); + int weight = + cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 2, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0); + std::vector weight_scales = {0.5, 1, 0.5}; + std::vector weight_zeropoints = {0, 0, 10}; + int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, + weight_scales, weight_zeropoints); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Cos.cc rename to tests/nnfw_api/src/one_op_tests/Cos.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc deleted file mode 100644 index a4fe884..0000000 --- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -struct DepthToSpaceVariationParam -{ - TestCaseData tcd; - circle::TensorType type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; -}; - -class DepthToSpaceVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -// Input shape: {1, 1, 2, 4} -// Block size: 2 -// Output shape: {1, 2, 4, 1} -INSTANTIATE_TEST_CASE_P( - GenModelTest, DepthToSpaceVariation, - ::testing::Values( - // Float - DepthToSpaceVariationParam{ - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})}, - // Int32 - DepthToSpaceVariationParam{ - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), - circle::TensorType::TensorType_INT32}, - // Int64 - DepthToSpaceVariationParam{ - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), - circle::TensorType::TensorType_INT64}, - // Uint8 - DepthToSpaceVariationParam{ - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), - circle::TensorType::TensorType_UINT8, 1.0f, -2}, - // Int8 - DepthToSpaceVariationParam{ - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), - circle::TensorType::TensorType_INT8, 1.0f, -2})); - -TEST_P(DepthToSpaceVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); - cgen.addOperatorDepthToSpace({{in}, {out}}, 2); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_P(DepthToSpaceVariation, neg_Blocksize) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); - cgen.addOperatorDepthToSpace({{in}, {out}}, -2); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc new file mode 100644 index 0000000..ad22729 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct DepthToSpaceVariationParam +{ + TestCaseData tcd; + circle::TensorType type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class DepthToSpaceVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: {1, 1, 2, 4} +// Block size: 2 +// Output shape: {1, 2, 4, 1} +INSTANTIATE_TEST_SUITE_P( + GenModelTest, DepthToSpaceVariation, + ::testing::Values( + // Float + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})}, + // Int32 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT32}, + // Int64 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT64}, + // Uint8 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_UINT8, 1.0f, -2}, + // Int8 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT8, 1.0f, -2})); + +TEST_P(DepthToSpaceVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); + cgen.addOperatorDepthToSpace({{in}, {out}}, 2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_P(DepthToSpaceVariation, neg_Blocksize) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); + cgen.addOperatorDepthToSpace({{in}, {out}}, -2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc deleted file mode 100644 index a0bdbf9..0000000 --- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{1, 2, 3, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}}, - {{71, -34, 99, -20, 91, -26, 127, -4}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier) -{ - CircleGen cgen; - std::vector weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0.5f, -0.5f}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, - {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6) -{ - CircleGen cgen; - std::vector weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0.5f, -0.5f}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, - circle::ActivationFunctionType_RELU6); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, - {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3) -{ - CircleGen cgen; - std::vector weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0.0f, 0.0f}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, - {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 0, 0, 0}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, - circle::ActivationFunctionType_NONE, 2, 2); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, - 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }}, - {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 0, 0, 0}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1, - circle::ActivationFunctionType_NONE, 3, 3); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, - 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, - {{4, 0, 3, 0, 0, 0, 2, 0, 1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{1, 2, 3, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 0, 0, 0}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, - circle::ActivationFunctionType_NONE, 0, 0); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{1, 2, 3, 4}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); - int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); - int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8}); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -// Generate a model for negative test cases -CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h, - int depth_multiplier, - circle::ActivationFunctionType actfn) -{ - CircleGen cgen; - uint32_t ker_buf = cgen.addBuffer(std::vector{0, 1, 2, 3, 0, 1, 2, 3}); - uint32_t bias_buf = cgen.addBuffer(std::vector{0, 0}); - int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0); - int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); - int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0); - int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0); - cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h, - depth_multiplier, actfn, 0, 0); - cgen.setInputsAndOutputs({in}, {out}); - return cgen.finish(); -} - -template struct DepthwiseConv2DQuantTestParam -{ - int stride = 1; // Used for both height and width - int input_depth = 1; - int depth_multiplier = 1; - std::vector ref_output; -}; - -template -class DepthwiseConv2DQuantTest - : public GenModelTest, - public ::testing::WithParamInterface> -{ -}; - -using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam; -using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest; - -// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU -// kernels. -INSTANTIATE_TEST_CASE_P( - GenModelTest, DepthwiseConv2DQuantTestU8, - ::testing::Values( - // Stride == 1 - DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, - DepthwiseConv2DQuantTestParamU8{ - 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, - DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector{0, 1, 4, 6}}, - DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector{2, 4}}, - DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector{0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DQuantTestParamU8{ - 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, - DepthwiseConv2DQuantTestParamU8{1, 12, 1, - std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, - // Stride == 2 - DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, - 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, - 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamU8{ - 2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamU8{ - 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamU8{ - 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamU8{ - 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); - -CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier) -{ - assert(1 <= stride && stride <= 2); - assert(1 <= input_depth && input_depth <= 16); - assert(1 <= depth_multiplier && depth_multiplier <= 32); - - const int output_depth = input_depth * depth_multiplier; - assert(1 <= output_depth && output_depth <= 32); - - CircleGen cgen; - uint32_t ker_buf = cgen.addBuffer(std::vector{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); - uint32_t bias_buf = cgen.addBuffer(std::vector(output_depth, 0)); - int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0); - int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); - int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); - int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0); - cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, - stride, depth_multiplier, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - return cgen.finish(); -} - -TEST_P(DepthwiseConv2DQuantTestU8, Test) -{ - // Same input is used for all tests but output differs - static const std::vector input64{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, - 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; - - auto ¶m = GetParam(); - _context = std::make_unique( - genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier)); - std::vector ref_input(input64.begin(), input64.begin() + param.input_depth * 4); - _context->addTestCase(uniformTCD({ref_input}, {param.ref_output})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam; -using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest; - -// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU -// kernels. -INSTANTIATE_TEST_CASE_P( - GenModelTest, DepthwiseConv2DQuantTestI8, - ::testing::Values( - // Stride == 1 - DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, - DepthwiseConv2DQuantTestParamI8{ - 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, - DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector{0, 1, 4, 6}}, - DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector{2, 4}}, - DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector{0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DQuantTestParamI8{ - 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, - DepthwiseConv2DQuantTestParamI8{1, 12, 1, - std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, - // Stride == 2 - DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, - 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, - 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamI8{ - 2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamI8{ - 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamI8{ - 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DQuantTestParamI8{ - 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); - -CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier) -{ - assert(1 <= stride && stride <= 2); - assert(1 <= input_depth && input_depth <= 16); - assert(1 <= depth_multiplier && depth_multiplier <= 32); - - const int output_depth = input_depth * depth_multiplier; - assert(1 <= output_depth && output_depth <= 32); - - CircleGen cgen; - uint32_t ker_buf = cgen.addBuffer(std::vector{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); - uint32_t bias_buf = cgen.addBuffer(std::vector(output_depth, 0)); - int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0); - int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0); - int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); - int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0); - cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, - stride, depth_multiplier, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - return cgen.finish(); -} - -TEST_P(DepthwiseConv2DQuantTestI8, Test) -{ - // Same input is used for all tests but output differs - static const std::vector input64{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, - 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; - - auto ¶m = GetParam(); - _context = std::make_unique( - genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier)); - std::vector ref_input(input64.begin(), input64.begin() + param.input_depth * 4); - _context->addTestCase(uniformTCD({ref_input}, {param.ref_output})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType) -{ - _context = std::make_unique(genNegTestDepthwiseConv2DModel( - static_cast(99), 1, 1, 1, circle::ActivationFunctionType_NONE)); - _context->expectFailModelLoad(); - _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); - - SUCCEED(); -} - -// TODO add other invalid operation tests like above - -TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints) -{ - CircleGen cgen; - std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8}; - uint32_t weight_buf = cgen.addBuffer(weight_data); - std::vector bias_data{0, 2}; - uint32_t bias_buf = cgen.addBuffer(bias_data); - int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0); - std::vector weight_scales = {0.5, 1}; - std::vector weight_zeropoints = {0, 10}; - int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf}, - weight_scales, weight_zeropoints); - int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf}); - int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0); - cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc new file mode 100644 index 0000000..f82d988 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{1, 2, 3, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}}, + {{71, -34, 99, -20, 91, -26, 127, -4}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier) +{ + CircleGen cgen; + std::vector weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0.5f, -0.5f}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, + {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6) +{ + CircleGen cgen; + std::vector weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0.5f, -0.5f}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, + circle::ActivationFunctionType_RELU6); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, + {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3) +{ + CircleGen cgen; + std::vector weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0.0f, 0.0f}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + uniformTCD({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}}, + {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"}); + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 0, 0, 0}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, + circle::ActivationFunctionType_NONE, 2, 2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }}, + {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 0, 0, 0}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1, + circle::ActivationFunctionType_NONE, 3, 3); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {{4, 0, 3, 0, 0, 0, 2, 0, 1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_DepthwiseConv2D_U8_PerChannel) +{ + CircleGen cgen; + // weight + // clang-format off + std::vector weight_data{2, 1, 2, + 6, 2, 3, + 2, 3, 4, + 4, 4, 5}; + // clang-format on + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector weight_scales = {.5, 1, 2}; + std::vector weight_zeropoints = {2, 0, 1}; + int weight = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8, weight_buf}, + weight_scales, weight_zeropoints); + // bias + std::vector bias_data{4, -8, -4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0); + + // in and out + int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8}, 2., 1); + int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2); + + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 1, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + // clang-format off + _context->addTestCase(uniformTCD({{5, 5, 5, // NHWC + 3, 3, 3, + 7, 7, 7, + 9, 9, 9} + }, + {{9, + 27, + 56} + })); + // clang-format on + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{1, 2, 3, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 0, 0, 0}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, + circle::ActivationFunctionType_NONE, 0, 0); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{1, 2, 3, 4}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8}); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +// Generate a model for negative test cases +CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h, + int depth_multiplier, + circle::ActivationFunctionType actfn) +{ + CircleGen cgen; + uint32_t ker_buf = cgen.addBuffer(std::vector{0, 1, 2, 3, 0, 1, 2, 3}); + uint32_t bias_buf = cgen.addBuffer(std::vector{0, 0}); + int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0); + int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); + int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0); + int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0); + cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h, + depth_multiplier, actfn, 0, 0); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +template struct DepthwiseConv2DQuantTestParam +{ + int stride = 1; // Used for both height and width + int input_depth = 1; + int depth_multiplier = 1; + std::vector ref_output; +}; + +template +class DepthwiseConv2DQuantTest + : public GenModelTest, + public ::testing::WithParamInterface> +{ +}; + +using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam; +using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest; + +// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU +// kernels. +INSTANTIATE_TEST_SUITE_P( + GenModelTest, DepthwiseConv2DQuantTestU8, + ::testing::Values( + // Stride == 1 + DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, + DepthwiseConv2DQuantTestParamU8{ + 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, + DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector{0, 1, 4, 6}}, + DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector{2, 4}}, + DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector{0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DQuantTestParamU8{ + 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, + DepthwiseConv2DQuantTestParamU8{1, 12, 1, + std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, + // Stride == 2 + DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, + 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, + 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamU8{ + 2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamU8{ + 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamU8{ + 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamU8{ + 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); + +CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier) +{ + assert(1 <= stride && stride <= 2); + assert(1 <= input_depth && input_depth <= 16); + assert(1 <= depth_multiplier && depth_multiplier <= 32); + + const int output_depth = input_depth * depth_multiplier; + assert(1 <= output_depth && output_depth <= 32); + + CircleGen cgen; + uint32_t ker_buf = cgen.addBuffer(std::vector{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); + uint32_t bias_buf = cgen.addBuffer(std::vector(output_depth, 0)); + int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0); + int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); + int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); + int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0); + cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, + stride, depth_multiplier, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_P(DepthwiseConv2DQuantTestU8, Test) +{ + // Same input is used for all tests but output differs + static const std::vector input64{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, + 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; + + auto ¶m = GetParam(); + _context = std::make_unique( + genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier)); + std::vector ref_input(input64.begin(), input64.begin() + param.input_depth * 4); + _context->addTestCase(uniformTCD({ref_input}, {param.ref_output})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam; +using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest; + +// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU +// kernels. +INSTANTIATE_TEST_SUITE_P( + GenModelTest, DepthwiseConv2DQuantTestI8, + ::testing::Values( + // Stride == 1 + DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, + DepthwiseConv2DQuantTestParamI8{ + 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, + DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector{0, 1, 4, 6}}, + DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector{2, 4}}, + DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector{0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DQuantTestParamI8{ + 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, + DepthwiseConv2DQuantTestParamI8{1, 12, 1, + std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, + // Stride == 2 + DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, + 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, + 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamI8{ + 2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamI8{ + 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamI8{ + 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DQuantTestParamI8{ + 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); + +CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier) +{ + assert(1 <= stride && stride <= 2); + assert(1 <= input_depth && input_depth <= 16); + assert(1 <= depth_multiplier && depth_multiplier <= 32); + + const int output_depth = input_depth * depth_multiplier; + assert(1 <= output_depth && output_depth <= 32); + + CircleGen cgen; + uint32_t ker_buf = cgen.addBuffer(std::vector{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); + uint32_t bias_buf = cgen.addBuffer(std::vector(output_depth, 0)); + int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0); + int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0); + int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); + int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0); + cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, + stride, depth_multiplier, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_P(DepthwiseConv2DQuantTestI8, Test) +{ + // Same input is used for all tests but output differs + static const std::vector input64{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, + 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; + + auto ¶m = GetParam(); + _context = std::make_unique( + genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier)); + std::vector ref_input(input64.begin(), input64.begin() + param.input_depth * 4); + _context->addTestCase(uniformTCD({ref_input}, {param.ref_output})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType) +{ + _context = std::make_unique(genNegTestDepthwiseConv2DModel( + static_cast(99), 1, 1, 1, circle::ActivationFunctionType_NONE)); + _context->expectFailModelLoad(); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); + + SUCCEED(); +} + +// TODO add other invalid operation tests like above + +TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints) +{ + CircleGen cgen; + std::vector weight_data{1, 2, 3, 4, 5, 6, 7, 8}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{0, 2}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0); + std::vector weight_scales = {0.5, 1}; + std::vector weight_zeropoints = {0, 10}; + int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf}, + weight_scales, weight_zeropoints); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf}); + int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0); + cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc rename to tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Elu.cc b/tests/nnfw_api/src/one_op_tests/Elu.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Elu.cc rename to tests/nnfw_api/src/one_op_tests/Elu.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Equal.cc rename to tests/nnfw_api/src/one_op_tests/Equal.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/ExpandDims.cc rename to tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Fill.cc b/tests/nnfw_api/src/one_op_tests/Fill.cc deleted file mode 100644 index 4d5e4d8..0000000 --- a/tests/nnfw_api/src/one_op_tests/Fill.cc +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -struct FillVariationParam -{ - TestCaseData tcd; - const uint8_t *value_data = nullptr; - circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; -}; - -class FillVariation : public GenModelTest, public ::testing::WithParamInterface -{ -}; - -// value is constant -TEST_P(FillVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - - size_t value_size = - (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t); - uint32_t value_buf = cgen.addBuffer(param.value_data, value_size); - - int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int value = cgen.addTensor({{1}, param.data_type, value_buf}); - int out = cgen.addTensor({{2, 3}, param.data_type}); - cgen.addOperatorFill({{dims, value}, {out}}); - cgen.setInputsAndOutputs({dims}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -const int32_t test_int32 = 13; -const int64_t test_int64 = 1052; -const float test_float = 5.2; - -// Test with different value type -INSTANTIATE_TEST_CASE_P( - GenModelTest, FillVariation, - ::testing::Values( - // float value - FillVariationParam{ - TestCaseData{}.addInput({2, 3}).addOutput({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}), - reinterpret_cast(&test_float)}, - // int32 value - FillVariationParam{ - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13}), - reinterpret_cast(&test_int32), circle::TensorType::TensorType_INT32}, - // uint8 value - FillVariationParam{ - TestCaseData{}.addInput({2, 3}).addOutput({1052, 1052, 1052, 1052, 1052, - 1052}), - reinterpret_cast(&test_int64), circle::TensorType::TensorType_INT64})); - -TEST_F(GenModelTest, OneOp_Fill_Int64_Shape) -{ - CircleGen cgen; - std::vector value_data{1.3}; - uint32_t value_buf = cgen.addBuffer(value_data); - - int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64}); - int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorFill({{dims, value}, {out}}); - cgen.setInputsAndOutputs({dims}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand) -{ - CircleGen cgen; - - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32}); - cgen.addOperatorFill({{in}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand) -{ - CircleGen cgen; - - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64}); - cgen.addOperatorFill({{in}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand) -{ - CircleGen cgen; - - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorFill({{in}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Fill.test.cc b/tests/nnfw_api/src/one_op_tests/Fill.test.cc new file mode 100644 index 0000000..0d34056 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Fill.test.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct FillVariationParam +{ + TestCaseData tcd; + const uint8_t *value_data = nullptr; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; +}; + +class FillVariation : public GenModelTest, public ::testing::WithParamInterface +{ +}; + +// value is constant +TEST_P(FillVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + + size_t value_size = + (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t); + uint32_t value_buf = cgen.addBuffer(param.value_data, value_size); + + int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int value = cgen.addTensor({{1}, param.data_type, value_buf}); + int out = cgen.addTensor({{2, 3}, param.data_type}); + cgen.addOperatorFill({{dims, value}, {out}}); + cgen.setInputsAndOutputs({dims}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +const int32_t test_int32 = 13; +const int64_t test_int64 = 1052; +const float test_float = 5.2; + +// Test with different value type +INSTANTIATE_TEST_SUITE_P( + GenModelTest, FillVariation, + ::testing::Values( + // float value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}), + reinterpret_cast(&test_float)}, + // int32 value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13}), + reinterpret_cast(&test_int32), circle::TensorType::TensorType_INT32}, + // uint8 value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({1052, 1052, 1052, 1052, 1052, + 1052}), + reinterpret_cast(&test_int64), circle::TensorType::TensorType_INT64})); + +TEST_F(GenModelTest, OneOp_Fill_Int64_Shape) +{ + CircleGen cgen; + std::vector value_data{1.3}; + uint32_t value_buf = cgen.addBuffer(value_data); + + int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64}); + int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf}); + int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorFill({{dims, value}, {out}}); + cgen.setInputsAndOutputs({dims}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand) +{ + CircleGen cgen; + + int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorFill({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand) +{ + CircleGen cgen; + + int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64}); + cgen.addOperatorFill({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand) +{ + CircleGen cgen; + + int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorFill({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Floor.cc b/tests/nnfw_api/src/one_op_tests/Floor.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Floor.cc rename to tests/nnfw_api/src/one_op_tests/Floor.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/FloorDiv.cc b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/FloorDiv.cc rename to tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/FullyConnected.cc rename to tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Greater.test.cc b/tests/nnfw_api/src/one_op_tests/Greater.test.cc new file mode 100644 index 0000000..b63075c --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Greater.test.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct GreaterVariationParam +{ + TestCaseData tcd; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + const std::vector backends = {"acl_cl", "acl_neon", "cpu"}; +}; + +class GreaterVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: +// Base: {1, 2, 2, 1} +// Brodcast: {1} on of two input +// Output shape: {1, 2, 2, 1} +// Input type: Non-quantization type +// Output type: BOOL +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P(GenModelTest, GreaterVariation, + ::testing::Values( + // Float type + GreaterVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({false, true, false, true})}, + // Float type - broadcast + GreaterVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.3}) + .addOutput({false, false, false, true})}, + // Int32 type + GreaterVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, true, false, true}), + circle::TensorType::TensorType_INT32}, + // Int32 type - broadcast + GreaterVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({5}) + .addOutput({false, false, false, true}), + circle::TensorType::TensorType_INT32}, + // Int64 type + // NYI: acl backend + GreaterVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, true, false, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Int64 type - broadcast + // NYI: acl backend + GreaterVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1}) + .addOutput({false, true, false, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}})); + +TEST_P(GreaterVariation, Test) +{ + auto ¶m = GetParam(); + + auto lhs_data = param.tcd.inputs.at(0); + auto rhs_data = param.tcd.inputs.at(1); + + bool broadcast_lhs = false; + bool broadcast_rhs = false; + if (lhs_data.size() != rhs_data.size()) + { + if (lhs_data.size() < rhs_data.size()) + broadcast_lhs = true; + else + broadcast_rhs = true; + } + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_BOOL; + + int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int out = cgen.addTensor({{1, 2, 2, 1}, output_type}); + cgen.addOperatorGreater({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backends); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Greater_DifferentType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorGreater({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Greater_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorGreater({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc new file mode 100644 index 0000000..f824030 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct GreaterEqualVariationParam +{ + TestCaseData tcd; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + const std::vector backends = {"acl_cl", "acl_neon", "cpu"}; +}; + +class GreaterEqualVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: +// Base: {1, 2, 2, 1} +// Brodcast: {1} on of two input +// Output shape: {1, 2, 2, 1} +// Input type: Non-quantization type +// Output type: BOOL +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P( + GenModelTest, GreaterEqualVariation, + ::testing::Values( + // Float type + GreaterEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({true, true, false, true})}, + // Float type - broadcast + GreaterEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.3}) + .addOutput({false, true, false, true})}, + // Int32 type + GreaterEqualVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({true, true, false, true}), + circle::TensorType::TensorType_INT32}, + // Int32 type - broadcast + GreaterEqualVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({5}) + .addOutput({false, false, false, true}), + circle::TensorType::TensorType_INT32}, + // Int64 type + // NYI: acl backend + GreaterEqualVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({true, true, false, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Int64 type - broadcast + // NYI: acl backend + GreaterEqualVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1}) + .addOutput({true, true, false, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}})); + +TEST_P(GreaterEqualVariation, Test) +{ + auto ¶m = GetParam(); + + auto lhs_data = param.tcd.inputs.at(0); + auto rhs_data = param.tcd.inputs.at(1); + + bool broadcast_lhs = false; + bool broadcast_rhs = false; + if (lhs_data.size() != rhs_data.size()) + { + if (lhs_data.size() < rhs_data.size()) + broadcast_lhs = true; + else + broadcast_rhs = true; + } + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_BOOL; + + int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int out = cgen.addTensor({{1, 2, 2, 1}, output_type}); + cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backends); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_GreaterEqual_DifferentType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_GreaterEqual_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/If.cc b/tests/nnfw_api/src/one_op_tests/If.cc deleted file mode 100644 index 4ec2942..0000000 --- a/tests/nnfw_api/src/one_op_tests/If.cc +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -TEST_F(GenModelTest, OneOp_If) -{ - // The model looks just like the below pseudocode - // - // function model(x) - // { - // if (x < 0.0) - // return -100.0; - // else - // return 100.0; - // } - - CircleGen cgen; - - // constant buffers - std::vector comp_data{0.0}; - uint32_t comp_buf = cgen.addBuffer(comp_data); - std::vector then_data{-100}; - uint32_t then_buf = cgen.addBuffer(then_data); - std::vector else_data{100}; - uint32_t else_buf = cgen.addBuffer(else_data); - - // primary subgraph - { - int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf}); - int cond = cgen.addTensor({{1}, circle::TensorType_BOOL}); - cgen.addOperatorLess({{x, comp}, {cond}}); - - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorIf({{cond}, {ret}}, 1, 2); - - cgen.setInputsAndOutputs({x}, {ret}); - } - - // then subgraph - { - cgen.nextSubgraph(); - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf}); - cgen.setInputsAndOutputs({}, {ret}); - } - - // else subgraph - { - cgen.nextSubgraph(); - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf}); - cgen.setInputsAndOutputs({}, {ret}); - } - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{-1.0}}, {{-100.0}})); - _context->addTestCase(uniformTCD({{1.0}}, {{100.0}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -class IfWrongSubgraphIndex : public GenModelTest, - public ::testing::WithParamInterface> -{ -}; - -TEST_P(IfWrongSubgraphIndex, neg_Test) -{ - // These values must be less than 0 or greater than 2 - int then_subg = GetParam().first; - int else_subg = GetParam().second; - - // When If operation's subgraph index is invalid - - CircleGen cgen; - - // constant buffers - std::vector then_data{-100}; - uint32_t then_buf = cgen.addBuffer(then_data); - std::vector else_data{100}; - uint32_t else_buf = cgen.addBuffer(else_data); - - // primary subgraph - { - int x = cgen.addTensor({{1}, circle::TensorType_BOOL}); - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg); - - cgen.setInputsAndOutputs({x}, {ret}); - } - - // then subgraph - { - cgen.nextSubgraph(); - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf}); - cgen.setInputsAndOutputs({}, {ret}); - } - - // else subgraph - { - cgen.nextSubgraph(); - int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf}); - cgen.setInputsAndOutputs({}, {ret}); - } - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -INSTANTIATE_TEST_CASE_P(GenModelTest, IfWrongSubgraphIndex, - ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2), - std::make_pair(1, 99), std::make_pair(1, -99), - std::make_pair(-99, 99))); diff --git a/tests/nnfw_api/src/one_op_tests/If.test.cc b/tests/nnfw_api/src/one_op_tests/If.test.cc new file mode 100644 index 0000000..543d879 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/If.test.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +TEST_F(GenModelTest, OneOp_If) +{ + // The model looks just like the below pseudocode + // + // function model(x) + // { + // if (x < 0.0) + // return -100.0; + // else + // return 100.0; + // } + + CircleGen cgen; + + // constant buffers + std::vector comp_data{0.0}; + uint32_t comp_buf = cgen.addBuffer(comp_data); + std::vector then_data{-100}; + uint32_t then_buf = cgen.addBuffer(then_data); + std::vector else_data{100}; + uint32_t else_buf = cgen.addBuffer(else_data); + + // primary subgraph + { + int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf}); + int cond = cgen.addTensor({{1}, circle::TensorType_BOOL}); + cgen.addOperatorLess({{x, comp}, {cond}}); + + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorIf({{cond}, {ret}}, 1, 2); + + cgen.setInputsAndOutputs({x}, {ret}); + } + + // then subgraph + { + cgen.nextSubgraph(); + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf}); + cgen.setInputsAndOutputs({}, {ret}); + } + + // else subgraph + { + cgen.nextSubgraph(); + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf}); + cgen.setInputsAndOutputs({}, {ret}); + } + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{-1.0}}, {{-100.0}})); + _context->addTestCase(uniformTCD({{1.0}}, {{100.0}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +class IfWrongSubgraphIndex : public GenModelTest, + public ::testing::WithParamInterface> +{ +}; + +TEST_P(IfWrongSubgraphIndex, neg_Test) +{ + // These values must be less than 0 or greater than 2 + int then_subg = GetParam().first; + int else_subg = GetParam().second; + + // When If operation's subgraph index is invalid + + CircleGen cgen; + + // constant buffers + std::vector then_data{-100}; + uint32_t then_buf = cgen.addBuffer(then_data); + std::vector else_data{100}; + uint32_t else_buf = cgen.addBuffer(else_data); + + // primary subgraph + { + int x = cgen.addTensor({{1}, circle::TensorType_BOOL}); + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg); + + cgen.setInputsAndOutputs({x}, {ret}); + } + + // then subgraph + { + cgen.nextSubgraph(); + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf}); + cgen.setInputsAndOutputs({}, {ret}); + } + + // else subgraph + { + cgen.nextSubgraph(); + int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf}); + cgen.setInputsAndOutputs({}, {ret}); + } + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +INSTANTIATE_TEST_SUITE_P(GenModelTest, IfWrongSubgraphIndex, + ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2), + std::make_pair(1, 99), std::make_pair(1, -99), + std::make_pair(-99, 99))); diff --git a/tests/nnfw_api/src/one_op_tests/InstanceNorm.cc b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/InstanceNorm.cc rename to tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/L2Normalization.cc rename to tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/LeakyRelu.cc rename to tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Less.test.cc b/tests/nnfw_api/src/one_op_tests/Less.test.cc new file mode 100644 index 0000000..6f76465 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Less.test.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct LessVariationParam +{ + TestCaseData tcd; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + const std::vector backends = {"acl_cl", "acl_neon", "cpu"}; +}; + +class LessVariation : public GenModelTest, public ::testing::WithParamInterface +{ +}; + +// Input shape: +// Base: {1, 2, 2, 1} +// Brodcast: {1} on of two input +// Output shape: {1, 2, 2, 1} +// Input type: Non-quantization type +// Output type: BOOL +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P(GenModelTest, LessVariation, + ::testing::Values( + // Float type + LessVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({false, false, true, false})}, + // Float type - broadcast + LessVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.3}) + .addOutput({true, false, true, false})}, + // Int32 type + LessVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, false, true, false}), + circle::TensorType::TensorType_INT32}, + // Int32 type - broadcast + LessVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({5}) + .addOutput({true, true, true, false}), + circle::TensorType::TensorType_INT32}, + // Int64 type + // NYI: acl backend + LessVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, false, true, false}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Int64 type - broadcast + // NYI: acl backend + LessVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1}) + .addOutput({false, false, true, false}), + circle::TensorType::TensorType_INT64, + {"cpu"}})); + +TEST_P(LessVariation, Test) +{ + auto ¶m = GetParam(); + + auto lhs_data = param.tcd.inputs.at(0); + auto rhs_data = param.tcd.inputs.at(1); + + bool broadcast_lhs = false; + bool broadcast_rhs = false; + if (lhs_data.size() != rhs_data.size()) + { + if (lhs_data.size() < rhs_data.size()) + broadcast_lhs = true; + else + broadcast_rhs = true; + } + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_BOOL; + + int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int out = cgen.addTensor({{1, 2, 2, 1}, output_type}); + cgen.addOperatorLess({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backends); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Less_DifferentType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorLess({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Less_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorLess({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc new file mode 100644 index 0000000..e0e6d66 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct LessEqualVariationParam +{ + TestCaseData tcd; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + const std::vector backends = {"acl_cl", "acl_neon", "cpu"}; +}; + +class LessEqualVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: +// Base: {1, 2, 2, 1} +// Brodcast: {1} on of two input +// Output shape: {1, 2, 2, 1} +// Input type: Non-quantization type +// Output type: BOOL +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P(GenModelTest, LessEqualVariation, + ::testing::Values( + // Float type + LessEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({true, false, true, false})}, + // Float type - broadcast + LessEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.2, 0.7}) + .addInput({0.3}) + .addOutput({true, true, true, false})}, + // Int32 type + LessEqualVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({true, false, true, false}), + circle::TensorType::TensorType_INT32}, + // Int32 type - broadcast + LessEqualVariationParam{TestCaseData{} + .addInput({1, 3, 2, 7}) + .addInput({5}) + .addOutput({true, true, true, false}), + circle::TensorType::TensorType_INT32}, + // Int64 type + // NYI: acl backend + LessEqualVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({true, false, true, false}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Int64 type - broadcast + // NYI: acl backend + LessEqualVariationParam{TestCaseData{} + .addInput({1, 3, -2, 7}) + .addInput({1}) + .addOutput({true, false, true, false}), + circle::TensorType::TensorType_INT64, + {"cpu"}})); + +TEST_P(LessEqualVariation, Test) +{ + auto ¶m = GetParam(); + + auto lhs_data = param.tcd.inputs.at(0); + auto rhs_data = param.tcd.inputs.at(1); + + bool broadcast_lhs = false; + bool broadcast_rhs = false; + if (lhs_data.size() != rhs_data.size()) + { + if (lhs_data.size() < rhs_data.size()) + broadcast_lhs = true; + else + broadcast_rhs = true; + } + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_BOOL; + + int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int out = cgen.addTensor({{1, 2, 2, 1}, output_type}); + cgen.addOperatorLessEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backends); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_LessEqual_DifferentType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorLessEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_LessEqual_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorLessEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/LogSoftmax.cc rename to tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Mean.cc b/tests/nnfw_api/src/one_op_tests/Mean.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Mean.cc rename to tests/nnfw_api/src/one_op_tests/Mean.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Mul.cc b/tests/nnfw_api/src/one_op_tests/Mul.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Mul.cc rename to tests/nnfw_api/src/one_op_tests/Mul.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Neg.cc b/tests/nnfw_api/src/one_op_tests/Neg.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Neg.cc rename to tests/nnfw_api/src/one_op_tests/Neg.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc new file mode 100644 index 0000000..6a3fec1 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct NotEqualVariationParam +{ + TestCaseData tcd; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + const std::vector backends = {"acl_cl", "acl_neon", "cpu"}; +}; + +class NotEqualVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: +// Base: {1, 2, 2, 1} +// Brodcast: {1} on of two input +// Output shape: {1, 2, 2, 1} +// Input type: Non-quantization type +// Output type: BOOL +// Test with different input type and value +INSTANTIATE_TEST_SUITE_P(GenModelTest, NotEqualVariation, + ::testing::Values( + // Float type + NotEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.5, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({false, true, true, true})}, + // Float type - broadcast + NotEqualVariationParam{TestCaseData{} + .addInput({0.1, 0.3, 0.5, 0.7}) + .addInput({0.3}) + .addOutput({true, false, true, true})}, + // Int32 type + NotEqualVariationParam{TestCaseData{} + .addInput({1, 3, 5, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, true, true, true}), + circle::TensorType::TensorType_INT32}, + // Int32 type - broadcast + NotEqualVariationParam{TestCaseData{} + .addInput({1, 3, 5, 7}) + .addInput({5}) + .addOutput({true, true, false, true}), + circle::TensorType::TensorType_INT32}, + // Int64 type + // NYI: acl backend + NotEqualVariationParam{TestCaseData{} + .addInput({1, 3, 5, 7}) + .addInput({1, 2, 3, 4}) + .addOutput({false, true, true, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Int64 type - broadcast + // NYI: acl backend + NotEqualVariationParam{TestCaseData{} + .addInput({1, 3, 5, 7}) + .addInput({1}) + .addOutput({false, true, true, true}), + circle::TensorType::TensorType_INT64, + {"cpu"}}, + // Bool type + NotEqualVariationParam{TestCaseData{} + .addInput({false, false, true, true}) + .addInput({false, true, false, true}) + .addOutput({false, true, true, false}), + circle::TensorType::TensorType_BOOL}, + // Bool type - broadcast + NotEqualVariationParam{TestCaseData{} + .addInput({false, false, true, true}) + .addInput({false}) + .addOutput({false, false, true, true}), + circle::TensorType::TensorType_BOOL} + + )); + +TEST_P(NotEqualVariation, Test) +{ + auto ¶m = GetParam(); + + auto lhs_data = param.tcd.inputs.at(0); + auto rhs_data = param.tcd.inputs.at(1); + + bool broadcast_lhs = false; + bool broadcast_rhs = false; + if (lhs_data.size() != rhs_data.size()) + { + if (lhs_data.size() < rhs_data.size()) + broadcast_lhs = true; + else + broadcast_rhs = true; + } + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_BOOL; + + int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type}) + : cgen.addTensor({{1, 2, 2, 1}, param.input_type}); + int out = cgen.addTensor({{1, 2, 2, 1}, output_type}); + cgen.addOperatorNotEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backends); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_NotEqual_DifferentType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorNotEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_NotEqual_InvalidType) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32}); + cgen.addOperatorNotEqual({{lhs, rhs}, {out}}); + cgen.setInputsAndOutputs({lhs, rhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/OneHot.cc b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/OneHot.cc rename to tests/nnfw_api/src/one_op_tests/OneHot.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc deleted file mode 100644 index c376c1c..0000000 --- a/tests/nnfw_api/src/one_op_tests/Pad.cc +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -// Input shape: {1, 2, 2, 1} -// Padding: {0, 0, 1, 1, 1, 1, 0, 0} -// Output shape: {1, 4, 4, 1} -struct PadParam -{ - TestCaseData tcd; - circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; -}; - -class PadVariation : public GenModelTest, public ::testing::WithParamInterface -{ -}; - -// Test with different value type -INSTANTIATE_TEST_CASE_P( - GenModelTest, PadVariation, - ::testing::Values( - // float value - PadParam{uniformTCD({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})}, - // uint8 value - PadParam{ - uniformTCD({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}), - circle::TensorType::TensorType_UINT8, 1.0, 8}, - // int8 value - PadParam{uniformTCD({{-2, -1, 1, 2}}, - {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}), - circle::TensorType::TensorType_INT8, 1.0, -5})); - -TEST_P(PadVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_P(PadVariation, neg_InvalidPadRank) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - std::vector padding_data{1, 1, 1, 1}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(PadVariation, neg_InvalidPadDim0) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - std::vector padding_data{1, 1, 1, 1}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(PadVariation, neg_InvalidPadDim1) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - std::vector padding_data{1, 1, 1, 1}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_P(PadVariation, neg_Type) -{ - auto ¶m = GetParam(); - - const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8) - ? circle::TensorType::TensorType_INT8 - : circle::TensorType::TensorType_UINT8); - - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1); - std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; - uint32_t padding_buf = cgen.addBuffer(padding_data); - int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3); - - cgen.addOperatorPad({{in, padding}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Pad.test.cc b/tests/nnfw_api/src/one_op_tests/Pad.test.cc new file mode 100644 index 0000000..582bd84 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Pad.test.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +// Input shape: {1, 2, 2, 1} +// Padding: {0, 0, 1, 1, 1, 1, 0, 0} +// Output shape: {1, 4, 4, 1} +struct PadParam +{ + TestCaseData tcd; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class PadVariation : public GenModelTest, public ::testing::WithParamInterface +{ +}; + +// Test with different value type +INSTANTIATE_TEST_SUITE_P( + GenModelTest, PadVariation, + ::testing::Values( + // float value + PadParam{uniformTCD({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})}, + // uint8 value + PadParam{ + uniformTCD({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}), + circle::TensorType::TensorType_UINT8, 1.0, 8}, + // int8 value + PadParam{uniformTCD({{-2, -1, 1, 2}}, + {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}), + circle::TensorType::TensorType_INT8, 1.0, -5})); + +TEST_P(PadVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_P(PadVariation, neg_InvalidPadRank) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + std::vector padding_data{1, 1, 1, 1}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(PadVariation, neg_InvalidPadDim0) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + std::vector padding_data{1, 1, 1, 1}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(PadVariation, neg_InvalidPadDim1) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + std::vector padding_data{1, 1, 1, 1}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_P(PadVariation, neg_Type) +{ + auto ¶m = GetParam(); + + const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8) + ? circle::TensorType::TensorType_INT8 + : circle::TensorType::TensorType_UINT8); + + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1); + std::vector padding_data{0, 0, 1, 1, 1, 1, 0, 0}; + uint32_t padding_buf = cgen.addBuffer(padding_data); + int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); + int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3); + + cgen.addOperatorPad({{in, padding}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/PadV2.cc rename to tests/nnfw_api/src/one_op_tests/PadV2.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Quantize.cc b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Quantize.cc rename to tests/nnfw_api/src/one_op_tests/Quantize.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Rank.cc rename to tests/nnfw_api/src/one_op_tests/Rank.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.cc b/tests/nnfw_api/src/one_op_tests/Reduce.cc deleted file mode 100644 index bdcc5c2..0000000 --- a/tests/nnfw_api/src/one_op_tests/Reduce.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims) -{ - CircleGen cgen; - uint32_t axis_buf = cgen.addBuffer(std::vector{0, 1, 2, 3}); - int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}); - int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf}); - int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims); - cgen.setInputsAndOutputs({in}, {out}); - return cgen.finish(); -} - -TEST_F(GenModelTest, OneOp_ReduceMax) -{ - auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false); - _context = std::make_unique(std::move(model)); - _context->addTestCase(uniformTCD({{1, 2, 3, 4, 5, 6}}, {{6}})); - _context->addTestCase(uniformTCD({{100, 98, 55, 200, 3, 40}}, {{200}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -class ReduceMaxBadIndex : public GenModelTest, - public ::testing::WithParamInterface> -{ -}; - -TEST_P(ReduceMaxBadIndex, neg_Test) -{ - CircleGen cgen; - // Axis cannot be equal or bigger than input's rank - 4 - uint32_t axis_buf = cgen.addBuffer(GetParam()); - int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}); - int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf}); - int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailCompile(); - - SUCCEED(); -} - -INSTANTIATE_TEST_CASE_P(GenModelTest, ReduceMaxBadIndex, - ::testing::Values(std::vector{0, 1, 2, 4}, - std::vector{0, -5, 2, 3}, - std::vector{-88, 1, 2, 3}, - std::vector{0, 1, 88, 3})); diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.test.cc b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc new file mode 100644 index 0000000..13d180a --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims) +{ + CircleGen cgen; + uint32_t axis_buf = cgen.addBuffer(std::vector{0, 1, 2, 3}); + int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_F(GenModelTest, OneOp_ReduceMax) +{ + auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false); + _context = std::make_unique(std::move(model)); + _context->addTestCase(uniformTCD({{1, 2, 3, 4, 5, 6}}, {{6}})); + _context->addTestCase(uniformTCD({{100, 98, 55, 200, 3, 40}}, {{200}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +class ReduceMaxBadIndex : public GenModelTest, + public ::testing::WithParamInterface> +{ +}; + +TEST_P(ReduceMaxBadIndex, neg_Test) +{ + CircleGen cgen; + // Axis cannot be equal or bigger than input's rank - 4 + uint32_t axis_buf = cgen.addBuffer(GetParam()); + int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailCompile(); + + SUCCEED(); +} + +INSTANTIATE_TEST_SUITE_P(GenModelTest, ReduceMaxBadIndex, + ::testing::Values(std::vector{0, 1, 2, 4}, + std::vector{0, -5, 2, 3}, + std::vector{-88, 1, 2, 3}, + std::vector{0, 1, 88, 3})); diff --git a/tests/nnfw_api/src/one_op_tests/Relu.cc b/tests/nnfw_api/src/one_op_tests/Relu.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Relu.cc rename to tests/nnfw_api/src/one_op_tests/Relu.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Relu6.cc b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Relu6.cc rename to tests/nnfw_api/src/one_op_tests/Relu6.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc deleted file mode 100644 index 5db08f1..0000000 --- a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -struct ResizeBilinearParam -{ - TestCaseData tcd; - circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; -}; - -class ResizeBilinearVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -TEST_P(ResizeBilinearVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - std::vector size_data{3, 3}; - uint32_t size_buf = cgen.addBuffer(size_data); - int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point); - cgen.addOperatorResizeBilinear({{in, size}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -INSTANTIATE_TEST_CASE_P( - GenModelTest, ResizeBilinearVariation, - ::testing::Values( - // float value - ResizeBilinearParam{uniformTCD({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, - 1.666666667, 2, 2, 2}})}, - // uint8 value - ResizeBilinearParam{uniformTCD({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}), - circle::TensorType::TensorType_UINT8, 1.0, 0}, - // int8 value - ResizeBilinearParam{uniformTCD({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}), - circle::TensorType::TensorType_INT8, 1.0, 0})); - -TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar) -{ - CircleGen cgen; - int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorResizeBilinear({{in, size}, {out}}); - cgen.setInputsAndOutputs({in, size}, {out}); - - _context = std::make_unique(cgen.finish()); - // FIXME enable a test case the below is not a valid test case - //_context->addTestCase(TestCaseData{}.addInput({3, 3}).addInput({1, 1, 2, - // 2}).addOutput({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal) -{ - CircleGen cgen; - std::vector size_data{-3, 3}; - uint32_t size_buf = cgen.addBuffer(size_data); - int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorResizeBilinear({{in, size}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc new file mode 100644 index 0000000..fe313d4 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +struct ResizeBilinearParam +{ + TestCaseData tcd; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class ResizeBilinearVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +TEST_P(ResizeBilinearVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + std::vector size_data{3, 3}; + uint32_t size_buf = cgen.addBuffer(size_data); + int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point); + cgen.addOperatorResizeBilinear({{in, size}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +INSTANTIATE_TEST_SUITE_P( + GenModelTest, ResizeBilinearVariation, + ::testing::Values( + // float value + ResizeBilinearParam{uniformTCD({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, + 1.666666667, 2, 2, 2}})}, + // uint8 value + ResizeBilinearParam{uniformTCD({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}), + circle::TensorType::TensorType_UINT8, 1.0, 0}, + // int8 value + ResizeBilinearParam{uniformTCD({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}), + circle::TensorType::TensorType_INT8, 1.0, 0})); + +TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar) +{ + CircleGen cgen; + int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorResizeBilinear({{in, size}, {out}}); + cgen.setInputsAndOutputs({in, size}, {out}); + + _context = std::make_unique(cgen.finish()); + // FIXME enable a test case the below is not a valid test case + //_context->addTestCase(TestCaseData{}.addInput({3, 3}).addInput({1, 1, 2, + // 2}).addOutput({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal) +{ + CircleGen cgen; + std::vector size_data{-3, 3}; + uint32_t size_buf = cgen.addBuffer(size_data); + int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorResizeBilinear({{in, size}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc rename to tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Reverse.cc b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Reverse.cc rename to tests/nnfw_api/src/one_op_tests/Reverse.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Select.cc b/tests/nnfw_api/src/one_op_tests/Select.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Select.cc rename to tests/nnfw_api/src/one_op_tests/Select.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Shape.cc b/tests/nnfw_api/src/one_op_tests/Shape.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Shape.cc rename to tests/nnfw_api/src/one_op_tests/Shape.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc deleted file mode 100644 index 002fb01..0000000 --- a/tests/nnfw_api/src/one_op_tests/Slice.cc +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -struct SliceVariationParam -{ - std::vector input_shape; - std::vector begins; - std::vector sizes; - TestCaseData tcd; - - circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; - float scale = 0.0f; - int64_t zero_point = 0; - circle::TensorType begins_type = circle::TensorType::TensorType_INT32; -}; - -class SliceVariation : public GenModelTest, - public ::testing::WithParamInterface -{ -}; - -INSTANTIATE_TEST_CASE_P( - GenModelTest, SliceVariation, - ::testing::Values( - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})}, - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), - circle::TensorType::TensorType_UINT8, - 1, - 0}, - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), - circle::TensorType::TensorType_FLOAT32, - 0, - 0, - circle::TensorType::TensorType_INT64})); - -TEST_P(SliceVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point); - if (param.begins_type == circle::TensorType::TensorType_INT32) - { - uint32_t begins_buf = cgen.addBuffer(param.begins); - int rank = param.begins.size(); - int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); - - uint32_t sizes_buf = cgen.addBuffer(param.sizes); - int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf}); - - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); - } - else if (param.begins_type == circle::TensorType::TensorType_INT64) - { - std::vector begins_64(param.begins.size()); - std::vector sizes_64(param.sizes.size()); - for (int i = 0; i < param.begins.size(); i++) - { - begins_64[i] = param.begins[i]; - sizes_64[i] = param.sizes[i]; - } - - uint32_t begins_buf = cgen.addBuffer(begins_64); - int rank = param.begins.size(); - int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); - - uint32_t sizes_buf = cgen.addBuffer(sizes_64); - int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf}); - - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); - } - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - - // acl don't support int64 yet - if (param.begins_type == circle::TensorType::TensorType_INT64) - { - _context->setBackends({"cpu"}); - } - else - { - _context->setBackends({"cpu", "acl_cl", "acl_neon"}); - } - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Slice_Type) -{ - CircleGen cgen; - int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); - std::vector begins_data = {0, 0, 1, 0}; - uint32_t begins_buf = cgen.addBuffer(begins_data); - int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf}); - std::vector sizes_data = {1, 2, 1, 1}; - uint32_t sizes_buf = cgen.addBuffer(sizes_data); - int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf}); - int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -TEST_P(SliceVariation, neg_DiffType) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point); - if (param.begins_type == circle::TensorType::TensorType_INT32) - { - uint32_t begins_buf = cgen.addBuffer(param.begins); - std::vector sizes_64(param.sizes.size()); - for (int i = 0; i < param.begins.size(); i++) - { - sizes_64[i] = param.sizes[i]; - } - - int rank = param.begins.size(); - int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); - - uint32_t sizes_buf = cgen.addBuffer(sizes_64); - int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf}); - - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); - } - else if (param.begins_type == circle::TensorType::TensorType_INT64) - { - std::vector begins_64(param.begins.size()); - for (int i = 0; i < param.begins.size(); i++) - { - begins_64[i] = param.begins[i]; - } - - uint32_t begins_buf = cgen.addBuffer(begins_64); - int rank = param.begins.size(); - int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); - - uint32_t sizes_buf = cgen.addBuffer(param.sizes); - int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf}); - - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); - } - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Slice.test.cc b/tests/nnfw_api/src/one_op_tests/Slice.test.cc new file mode 100644 index 0000000..8cd9d70 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Slice.test.cc @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct SliceVariationParam +{ + std::vector input_shape; + std::vector begins; + std::vector sizes; + TestCaseData tcd; + + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; + circle::TensorType begins_type = circle::TensorType::TensorType_INT32; +}; + +class SliceVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +INSTANTIATE_TEST_SUITE_P( + GenModelTest, SliceVariation, + ::testing::Values( + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})}, + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), + circle::TensorType::TensorType_UINT8, + 1, + 0}, + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), + circle::TensorType::TensorType_FLOAT32, + 0, + 0, + circle::TensorType::TensorType_INT64})); + +TEST_P(SliceVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point); + if (param.begins_type == circle::TensorType::TensorType_INT32) + { + uint32_t begins_buf = cgen.addBuffer(param.begins); + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(param.sizes); + int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } + else if (param.begins_type == circle::TensorType::TensorType_INT64) + { + std::vector begins_64(param.begins.size()); + std::vector sizes_64(param.sizes.size()); + for (int i = 0; i < param.begins.size(); i++) + { + begins_64[i] = param.begins[i]; + sizes_64[i] = param.sizes[i]; + } + + uint32_t begins_buf = cgen.addBuffer(begins_64); + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(sizes_64); + int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + + // acl don't support int64 yet + if (param.begins_type == circle::TensorType::TensorType_INT64) + { + _context->setBackends({"cpu"}); + } + else + { + _context->setBackends({"cpu", "acl_cl", "acl_neon"}); + } + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Slice_Type) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + std::vector begins_data = {0, 0, 1, 0}; + uint32_t begins_buf = cgen.addBuffer(begins_data); + int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf}); + std::vector sizes_data = {1, 2, 1, 1}; + uint32_t sizes_buf = cgen.addBuffer(sizes_data); + int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf}); + int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_P(SliceVariation, neg_DiffType) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point); + if (param.begins_type == circle::TensorType::TensorType_INT32) + { + uint32_t begins_buf = cgen.addBuffer(param.begins); + std::vector sizes_64(param.sizes.size()); + for (int i = 0; i < param.begins.size(); i++) + { + sizes_64[i] = param.sizes[i]; + } + + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(sizes_64); + int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } + else if (param.begins_type == circle::TensorType::TensorType_INT64) + { + std::vector begins_64(param.begins.size()); + for (int i = 0; i < param.begins.size(); i++) + { + begins_64[i] = param.begins[i]; + } + + uint32_t begins_buf = cgen.addBuffer(begins_64); + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(param.sizes); + int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc deleted file mode 100644 index aba4e89..0000000 --- a/tests/nnfw_api/src/one_op_tests/Softmax.cc +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -// beta = 0.1 -// input/output shape: {1, 2, 1, 4} -struct SoftmaxParam -{ - TestCaseData tcd; - circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; - float input_scale = 0.0f; - int64_t input_zero_point = 0; -}; - -class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface -{ -}; - -// Test with different value type -INSTANTIATE_TEST_CASE_P( - GenModelTest, SoftmaxVariation, - ::testing::Values( - // float value - SoftmaxParam{ - uniformTCD({{0, -6, 2, 4, 3, -2, 10, 1}}, - {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})}, - // uint8 value - SoftmaxParam{ - uniformTCD({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}), - circle::TensorType::TensorType_UINT8, 1.0, 10}, - // int8 value - SoftmaxParam{ - uniformTCD({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}), - circle::TensorType::TensorType_INT8, 1.0, 0})); - -TEST_P(SoftmaxVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - - // NNAPI spec and tflite test use fixed output scale and zero-point - float out_scale = 0.0; - int64_t out_zero_point = 0; - if (param.data_type == circle::TensorType::TensorType_UINT8) - { - out_scale = 1.0f / 256; - } - else if (param.data_type == circle::TensorType::TensorType_INT8) - { - out_scale = 1.0f / 256; - out_zero_point = -128; - } - - int input = - cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point); - int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point); - cgen.addOperatorSoftmax({{input}, {out}}, 0.1); - cgen.setInputsAndOutputs({input}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"cpu", "acl_neon", "acl_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta) -{ - CircleGen cgen; - int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorSoftmax({{input}, {out}}, 0.1); - cgen.setInputsAndOutputs({input}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}})); - _context->setBackends({"gpu_cl"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_Softmax) -{ - CircleGen cgen; - int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0); - cgen.setInputsAndOutputs({lhs}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{-1., 0., 1., 1.}}, - {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}})); - _context->setBackends({"acl_cl", "cpu", "gpu_cl"}); - - SUCCEED(); -} - -TEST_P(SoftmaxVariation, neg_Type) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input = - cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point); - int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL}); - cgen.addOperatorSoftmax({{input}, {out}}, 0.1); - cgen.setInputsAndOutputs({input}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->expectFailModelLoad(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.test.cc b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc new file mode 100644 index 0000000..1782baf --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +// beta = 0.1 +// input/output shape: {1, 2, 1, 4} +struct SoftmaxParam +{ + TestCaseData tcd; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; + float input_scale = 0.0f; + int64_t input_zero_point = 0; +}; + +class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface +{ +}; + +// Test with different value type +INSTANTIATE_TEST_SUITE_P( + GenModelTest, SoftmaxVariation, + ::testing::Values( + // float value + SoftmaxParam{ + uniformTCD({{0, -6, 2, 4, 3, -2, 10, 1}}, + {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})}, + // uint8 value + SoftmaxParam{ + uniformTCD({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}), + circle::TensorType::TensorType_UINT8, 1.0, 10}, + // int8 value + SoftmaxParam{ + uniformTCD({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}), + circle::TensorType::TensorType_INT8, 1.0, 0})); + +TEST_P(SoftmaxVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + + // NNAPI spec and tflite test use fixed output scale and zero-point + float out_scale = 0.0; + int64_t out_zero_point = 0; + if (param.data_type == circle::TensorType::TensorType_UINT8) + { + out_scale = 1.0f / 256; + } + else if (param.data_type == circle::TensorType::TensorType_INT8) + { + out_scale = 1.0f / 256; + out_zero_point = -128; + } + + int input = + cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point); + int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point); + cgen.addOperatorSoftmax({{input}, {out}}, 0.1); + cgen.setInputsAndOutputs({input}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"cpu", "acl_neon", "acl_cl"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta) +{ + CircleGen cgen; + int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorSoftmax({{input}, {out}}, 0.1); + cgen.setInputsAndOutputs({input}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}})); + _context->setBackends({"gpu_cl"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Softmax) +{ + CircleGen cgen; + int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0); + cgen.setInputsAndOutputs({lhs}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{-1., 0., 1., 1.}}, + {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}})); + _context->setBackends({"acl_cl", "cpu", "gpu_cl"}); + + SUCCEED(); +} + +TEST_P(SoftmaxVariation, neg_Type) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input = + cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point); + int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorSoftmax({{input}, {out}}, 0.1); + cgen.setInputsAndOutputs({input}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Split.cc b/tests/nnfw_api/src/one_op_tests/Split.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Split.cc rename to tests/nnfw_api/src/one_op_tests/Split.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Sqrt.cc b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Sqrt.cc rename to tests/nnfw_api/src/one_op_tests/Sqrt.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Square.cc b/tests/nnfw_api/src/one_op_tests/Square.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Square.cc rename to tests/nnfw_api/src/one_op_tests/Square.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/StridedSlice.cc b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/StridedSlice.cc rename to tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Sub.cc b/tests/nnfw_api/src/one_op_tests/Sub.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Sub.cc rename to tests/nnfw_api/src/one_op_tests/Sub.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Tile.cc b/tests/nnfw_api/src/one_op_tests/Tile.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Tile.cc rename to tests/nnfw_api/src/one_op_tests/Tile.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/Transpose.cc b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc similarity index 100% rename from tests/nnfw_api/src/one_op_tests/Transpose.cc rename to tests/nnfw_api/src/one_op_tests/Transpose.test.cc diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc deleted file mode 100644 index ee0a9df..0000000 --- a/tests/nnfw_api/src/one_op_tests/While.cc +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" -#include "WhileTestModel.h" - -#include - -TEST_F(GenModelTest, OneOp_While) -{ - WhileModelLoop10 model; - _context = std::make_unique(std::move(model.cbuf)); - _context->addTestCase(uniformTCD({{0}}, {{100}})); - _context->addTestCase(uniformTCD({{2}}, {{102}})); - _context->addTestCase(uniformTCD({{22}}, {{102}})); - _context->addTestCase(uniformTCD({{100}}, {{100}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_While_github_4783) -{ - // The model looks just like the below pseudocode - // - // function model(x, data) - // { - // // `data` does not do anything but passed to while's cond and body subgraphs - // // to measure copy overhead between subgraphs - // while (x < 100.0) - // { - // x = x + 1.0; - // } - // return (x, data) - // } - - const int kElems = 4; - const std::vector shape{kElems}; - - CircleGen cgen; - uint32_t incr_buf = cgen.addBuffer(std::vector{1}); - uint32_t incr_data_buf = cgen.addBuffer(std::vector(kElems, 1)); - uint32_t end_buf = cgen.addBuffer(std::vector{100}); - - // primary subgraph - { - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32}); - cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2); - cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out}); - } - - // cond subgraph - { - cgen.nextSubgraph(); - int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int d = cgen.addTensor({shape, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf}); - int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); - cgen.addOperatorLess({{x, end}, {result}}); - cgen.setInputsAndOutputs({x, d}, {result}); - } - - // body subgraph - { - cgen.nextSubgraph(); - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32}); - int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf}); - int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32}); - cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); - cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out}); - } - - _context = std::make_unique(cgen.finish()); - std::vector tc_data_in(kElems, 9); - std::vector tc_data_out(kElems, 109); - _context->addTestCase(uniformTCD({{0}, tc_data_in}, {{100}, tc_data_out})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_While_TwoInputs) -{ - // The model looks just like the below pseudocode - // - // function model(x, end) - // { - // while (x < end) - // { - // x = x + 10.0 - // } - // return x - // } - - CircleGen cgen; - std::vector incr_data{10}; - uint32_t incr_buf = cgen.addBuffer(incr_data); - - // primary subgraph - { - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2); - cgen.setInputsAndOutputs({x_in, end_in}, {x_out}); - } - - // cond subgraph - { - cgen.nextSubgraph(); - int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); - cgen.addOperatorLess({{x, end}, {result}}); - cgen.setInputsAndOutputs({x, end}, {result}); - } - - // body subgraph - { - cgen.nextSubgraph(); - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({x_in, end}, {x_out, end}); - } - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{0}, {20}}, {{20}})); - _context->addTestCase(uniformTCD({{5}, {30}}, {{35}})); - _context->addTestCase(uniformTCD({{20}, {10}}, {{20}})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -class WhileWrongSubgraphIndex : public GenModelTest, - public ::testing::WithParamInterface> -{ -}; - -TEST_P(WhileWrongSubgraphIndex, neg_Test) -{ - // These values must be less than 0 or greater than 2 - int cond_subg = GetParam().first; - int body_subg = GetParam().second; - - // When While operation's subgraph index is invalid - - CircleGen cgen; - - // constant buffers - std::vector incr_data{10}; - uint32_t incr_buf = cgen.addBuffer(incr_data); - - // primary subgraph - { - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg); - cgen.setInputsAndOutputs({x_in, end_in}, {x_out}); - } - - // cond subgraph - { - cgen.nextSubgraph(); - int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); - cgen.addOperatorLess({{x, end}, {result}}); - cgen.setInputsAndOutputs({x, end}, {result}); - } - - // body subgraph - { - cgen.nextSubgraph(); - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({x_in, end}, {x_out, end}); - } - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); - _context->expectFailModelLoad(); - - SUCCEED(); -} - -INSTANTIATE_TEST_CASE_P(GenModelTest, WhileWrongSubgraphIndex, - ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2), - std::make_pair(1, 99), std::make_pair(1, -99), - std::make_pair(-99, 99))); - -// In this test, output of WHILE and body subgraph have different data types -TEST_F(GenModelTest, neg_while_wrong_dtype) -{ - CircleGen cgen; - std::vector incr_data{10}; - uint32_t incr_buf = cgen.addBuffer(incr_data); - std::vector end_data{100}; - uint32_t end_buf = cgen.addBuffer(end_data); - - // primary subgraph - { - int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - - cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2); - cgen.setInputsAndOutputs({model_in}, {model_out}); - } - - // cond subgraph - { - cgen.nextSubgraph(); - int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf}); - int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); - cgen.addOperatorLess({{x, end}, {result}}); - cgen.setInputsAndOutputs({x}, {result}); - } - - // body subgraph - { - cgen.nextSubgraph(); - int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); - int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); - int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32}); - cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); - cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32, - circle::TensorType_INT32); - cgen.setInputsAndOutputs({x_in}, {cast_out}); - // output of this subgraph is INT32 but output of WHILE is FLOAT32 - } - - _context = std::make_unique(cgen.finish()); - auto tc = uniformTCD({{0}}, {{100}}); - tc.expectFailRun(); - _context->addTestCase(tc); - _context->setBackends({"cpu"}); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/While.test.cc b/tests/nnfw_api/src/one_op_tests/While.test.cc new file mode 100644 index 0000000..5c4da55 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/While.test.cc @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" +#include "WhileTestModel.h" + +#include + +TEST_F(GenModelTest, OneOp_While) +{ + WhileModelLoop10 model; + _context = std::make_unique(std::move(model.cbuf)); + _context->addTestCase(uniformTCD({{0}}, {{100}})); + _context->addTestCase(uniformTCD({{2}}, {{102}})); + _context->addTestCase(uniformTCD({{22}}, {{102}})); + _context->addTestCase(uniformTCD({{100}}, {{100}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_While_github_4783) +{ + // The model looks just like the below pseudocode + // + // function model(x, data) + // { + // // `data` does not do anything but passed to while's cond and body subgraphs + // // to measure copy overhead between subgraphs + // while (x < 100.0) + // { + // x = x + 1.0; + // } + // return (x, data) + // } + + const int kElems = 4; + const std::vector shape{kElems}; + + CircleGen cgen; + uint32_t incr_buf = cgen.addBuffer(std::vector{1}); + uint32_t incr_data_buf = cgen.addBuffer(std::vector(kElems, 1)); + uint32_t end_buf = cgen.addBuffer(std::vector{100}); + + // primary subgraph + { + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32}); + cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2); + cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out}); + } + + // cond subgraph + { + cgen.nextSubgraph(); + int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int d = cgen.addTensor({shape, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf}); + int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); + cgen.addOperatorLess({{x, end}, {result}}); + cgen.setInputsAndOutputs({x, d}, {result}); + } + + // body subgraph + { + cgen.nextSubgraph(); + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32}); + int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf}); + int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32}); + cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); + cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out}); + } + + _context = std::make_unique(cgen.finish()); + std::vector tc_data_in(kElems, 9); + std::vector tc_data_out(kElems, 109); + _context->addTestCase(uniformTCD({{0}, tc_data_in}, {{100}, tc_data_out})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_While_TwoInputs) +{ + // The model looks just like the below pseudocode + // + // function model(x, end) + // { + // while (x < end) + // { + // x = x + 10.0 + // } + // return x + // } + + CircleGen cgen; + std::vector incr_data{10}; + uint32_t incr_buf = cgen.addBuffer(incr_data); + + // primary subgraph + { + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2); + cgen.setInputsAndOutputs({x_in, end_in}, {x_out}); + } + + // cond subgraph + { + cgen.nextSubgraph(); + int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); + cgen.addOperatorLess({{x, end}, {result}}); + cgen.setInputsAndOutputs({x, end}, {result}); + } + + // body subgraph + { + cgen.nextSubgraph(); + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({x_in, end}, {x_out, end}); + } + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD({{0}, {20}}, {{20}})); + _context->addTestCase(uniformTCD({{5}, {30}}, {{35}})); + _context->addTestCase(uniformTCD({{20}, {10}}, {{20}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +class WhileWrongSubgraphIndex : public GenModelTest, + public ::testing::WithParamInterface> +{ +}; + +TEST_P(WhileWrongSubgraphIndex, neg_Test) +{ + // These values must be less than 0 or greater than 2 + int cond_subg = GetParam().first; + int body_subg = GetParam().second; + + // When While operation's subgraph index is invalid + + CircleGen cgen; + + // constant buffers + std::vector incr_data{10}; + uint32_t incr_buf = cgen.addBuffer(incr_data); + + // primary subgraph + { + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg); + cgen.setInputsAndOutputs({x_in, end_in}, {x_out}); + } + + // cond subgraph + { + cgen.nextSubgraph(); + int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); + cgen.addOperatorLess({{x, end}, {result}}); + cgen.setInputsAndOutputs({x, end}, {result}); + } + + // body subgraph + { + cgen.nextSubgraph(); + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({x_in, end}, {x_out, end}); + } + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +INSTANTIATE_TEST_SUITE_P(GenModelTest, WhileWrongSubgraphIndex, + ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2), + std::make_pair(1, 99), std::make_pair(1, -99), + std::make_pair(-99, 99))); + +// In this test, output of WHILE and body subgraph have different data types +TEST_F(GenModelTest, neg_while_wrong_dtype) +{ + CircleGen cgen; + std::vector incr_data{10}; + uint32_t incr_buf = cgen.addBuffer(incr_data); + std::vector end_data{100}; + uint32_t end_buf = cgen.addBuffer(end_data); + + // primary subgraph + { + int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + + cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2); + cgen.setInputsAndOutputs({model_in}, {model_out}); + } + + // cond subgraph + { + cgen.nextSubgraph(); + int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf}); + int result = cgen.addTensor({{1}, circle::TensorType_BOOL}); + cgen.addOperatorLess({{x, end}, {result}}); + cgen.setInputsAndOutputs({x}, {result}); + } + + // body subgraph + { + cgen.nextSubgraph(); + int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf}); + int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32}); + int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32}); + cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE); + cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32, + circle::TensorType_INT32); + cgen.setInputsAndOutputs({x_in}, {cast_out}); + // output of this subgraph is INT32 but output of WHILE is FLOAT32 + } + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + // It is correct to call `_context->expectFailModelLoad();`, but OperationValidator does not deal + // with subgraphs. So it is verified by `_context->expectFailCompile(); as a workaround` + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/scripts/command/nnpkg-test b/tests/scripts/command/nnpkg-test index a1176d1..ba71217 100644 --- a/tests/scripts/command/nnpkg-test +++ b/tests/scripts/command/nnpkg-test @@ -12,6 +12,7 @@ outdir="." nnpkg_run=${nnpkg_run:-"nnpackage_run"} difftool=${difftool:-"h5diff"} delete_dumped_on_failure=0 +verbose_diff=0 usage() { echo "Usage: $0 $progname [options] nnpackage_test" @@ -27,6 +28,7 @@ usage() { echo " -o set output directory (default=$outdir)" echo " -d delete dumped file on failure." echo " (dumped file are always deleted on success) (default=$delete_dumped_on_failure)" + echo " -v verbose result diff (default=$verbose_diff)" echo "" echo "Environment variables:" echo " nnpackage_run path to nnpackage_run (default=nnpackage_run)" @@ -43,12 +45,13 @@ if [ $# -eq 0 ]; then exit 1 fi -while getopts "hdi:o:" OPTION; do +while getopts "hdi:o:v" OPTION; do case "${OPTION}" in h) usage;; d) delete_dumped_on_failure=1;; i) indir=$OPTARG;; o) outdir=$OPTARG;; + v) verbose_diff=1;; ?) exit 1;; esac done @@ -110,8 +113,8 @@ echo -n "[Compare] $nnpkg " test_fail() { echo -e "\tFail" - [ $delete_dumped_on_failure ] && rm "$dumped" - cat "$dumped.log" + [ $delete_dumped_on_failure -eq 1 ] && rm "$dumped" + [ $verbose_diff -eq 1 ] && cat "$dumped.log" rm "$dumped.log" exit 3 } @@ -119,7 +122,7 @@ test_fail() test_pass() { echo -e "\tPass" - cat "$dumped.log" + [ $verbose_diff -eq 1 ] && cat "$dumped.log" rm "$dumped" "$dumped.log" } diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model index 5b33408..7c65254 100644 --- a/tests/scripts/command/prepare-model +++ b/tests/scripts/command/prepare-model @@ -24,8 +24,8 @@ function Usage() echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]" echo "" echo "Options:" - echo " --ignoremd5 Ignore MD5 check when download model files" - echo " --model=(all|nnpackage|tflite) Download test model (deprecated option: always all)" + echo " --ignoremd5 Ignore MD5 check when download model files" + echo " -h, --help Display this help message and exit" } for i in "$@" @@ -38,9 +38,6 @@ do --ignoremd5) MD5_CHECK="off" ;; - --model=*) - # deprecated - ;; *) echo "Unknown option: $i" exit 1 @@ -49,9 +46,10 @@ do shift done -# Default download server url +# Check MODELFILE_SERVER if [[ -z "$MODELFILE_SERVER" ]]; then - export MODELFILE_SERVER="http://npu.mooo.com/archive/tflite_test_model/" + echo "Fail to download models: Please set MODELFILE_SERVER to download model" + exit 1 fi echo "Download from $MODELFILE_SERVER" diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc index 71d8b59..7a58053 100644 --- a/tests/tools/nnpackage_run/src/nnpackage_run.cc +++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc @@ -29,6 +29,7 @@ #include "ruy/profiler/profiler.h" #endif +#include #include #include #include @@ -313,6 +314,11 @@ int main(const int argc, char **argv) return 0; } + catch (boost::program_options::error &e) + { + std::cerr << "E: " << e.what() << std::endl; + exit(-1); + } catch (std::runtime_error &e) { std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl; diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc index f90018e..e4b9774 100644 --- a/tests/tools/nnpackage_run/src/rawformatter.cc +++ b/tests/tools/nnpackage_run/src/rawformatter.cc @@ -29,14 +29,13 @@ void RawFormatter::loadInputs(const std::string &filename, std::vectorUseNNAPI(use_nnapi); + if (use_nnapi) + { + // TFLite NNAPI is not worked yet + interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate()); + } interpreter->AllocateTensors(); }); } diff --git a/tools/cross/arm/sources.list.jammy b/tools/cross/arm/sources.list.jammy new file mode 100644 index 0000000..6bb0453 --- /dev/null +++ b/tools/cross/arm/sources.list.jammy @@ -0,0 +1,11 @@ +deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe +deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe + +deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe +deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe + +deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted +deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted + +deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse +deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse diff --git a/tools/cross/arm/sources.list.xenial b/tools/cross/arm/sources.list.xenial deleted file mode 100644 index 56fbb36..0000000 --- a/tools/cross/arm/sources.list.xenial +++ /dev/null @@ -1,11 +0,0 @@ -deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe -deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe - -deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe -deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe - -deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted -deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted - -deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse -deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh index f03d523..16f3a5c 100755 --- a/tools/cross/install_rootfs.sh +++ b/tools/cross/install_rootfs.sh @@ -3,7 +3,7 @@ usage() { echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]" echo "BuildArch can be: arm(default), aarch64 and armel" - echo "LinuxCodeName - optional, Code name for Linux, can be: xenial, bionic(default), focal" + echo "LinuxCodeName - optional, Code name for Linux, can be: bionic(default), focal, jammy" echo " If BuildArch is armel, this can be tizen(default)" echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber" echo " default no proxy. Example: --setproxy=127.1.2.3:8080" @@ -22,12 +22,15 @@ __SkipUnmount=0 __IsProxySet=0 __Apt="" # base development support +# install cmake to find cmake package configuration for target file system __UbuntuPackages="build-essential" +__UbuntuPackages+=" cmake" # other development supports __UbuntuPackages+=" ocl-icd-opencl-dev" __UbuntuPackages+=" libhdf5-dev" __UbuntuPackages+=" libboost-all-dev" +__UbuntuPackages+=" libglib2.0-dev" # symlinks fixer __UbuntuPackages+=" symlinks" @@ -67,15 +70,15 @@ for i in "$@" ; do __UbuntuRepo= __LinuxCodeName= ;; - xenial) - __LinuxCodeName=xenial - ;; bionic) __LinuxCodeName=bionic ;; focal) __LinuxCodeName=focal ;; + jammy) + __LinuxCodeName=jammy + ;; --setproxy*) proxyip="${i#*=}" __Apt="Acquire::http::proxy \"http://$proxyip/\";\n" diff --git a/tools/nnpackage_tool/gen_golden/gen_golden.py b/tools/nnpackage_tool/gen_golden/gen_golden.py index 79c86e6..d555419 100755 --- a/tools/nnpackage_tool/gen_golden/gen_golden.py +++ b/tools/nnpackage_tool/gen_golden/gen_golden.py @@ -96,7 +96,7 @@ if __name__ == '__main__': np.random.randint(-127, 127, this_shape).astype(np.int8)) elif this_dtype == tf.float32: input_values.append( - np.random.random_sample(this_shape).astype(np.float32)) + (10 * np.random.random_sample(this_shape) - 5).astype(np.float32)) elif this_dtype == tf.bool: # generate random integer from [0, 2) input_values.append( @@ -142,7 +142,7 @@ if __name__ == '__main__': np.random.randint(-127, 127, this_shape).astype(np.int8)) elif this_dtype == np.float32: input_values.append( - np.random.random_sample(this_shape).astype(np.float32)) + (10 * np.random.random_sample(this_shape) - 5).astype(np.float32)) elif this_dtype == np.bool_: # generate random integer from [0, 2) input_values.append( diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh index 9374af7..5c7c35b 100755 --- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh +++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh @@ -5,24 +5,33 @@ set -eu progname=$(basename "${BASH_SOURCE[0]}") outdir="." name="" -config="" -config_src="" +configs_src=() +models_src=() +configs_str="" +models_str="" +types_str="" usage() { - echo "Usage: $progname [options] modelfile" + echo "Usage: $progname [options]" echo "Convert modelfile (tflite, circle or tvn) to nnpackage." echo "" echo "Options:" echo " -h show this help" echo " -o set nnpackage output directory (default=$outdir)" - echo " -p set nnpackage output name (default=[modelfile name])" - echo " -c provide configuration file" + echo " -p set nnpackage output name (default=[1st modelfile name])" + echo " -c provide configuration files" + echo " -m provide model files" + echo "" + echo " (Will be deprecated: if there is one remain parameter, that is model file)" echo "" echo "Examples:" - echo " $progname add.tflite => create nnpackage 'add' in $outdir/" - echo " $progname -o out add.tflite => create nnpackage 'add' in out/" - echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/" - echo " $progname -c add.cfg add.tflite => create nnpackage 'add' with add.cfg" + echo " $progname -m add.tflite => create nnpackage 'add' in $outdir/" + echo " $progname -o out -m add.tflite => create nnpackage 'add' in out/" + echo " $progname -o out -p addpkg -m add.tflite => create nnpackage 'addpkg' in out/" + echo " $progname -c add.cfg -m add.tflite => create nnpackage 'add' with add.cfg" + echo " $progname -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage 'addpkg' with models a1.tflite and a2.tflite in out/" + echo "" + echo "(Will be deprecated: if there is one remain parameter, that is model file)" exit 1 } @@ -31,58 +40,116 @@ if [ $# -eq 0 ]; then exit 1 fi -while getopts "ho:p:c:" OPTION; do -case "${OPTION}" in +while getopts "ho:p:c:m:" OPTION; do + case "${OPTION}" in h) usage;; o) outdir=$OPTARG;; p) name=$OPTARG;; - c) config_src=$OPTARG;; + c) + configs_src=($OPTARG) + until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do + if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then + # Backward compatibility (will be deprecated) + # The last remain parameter is model if there is no option "-m" + models_src=($(eval "echo \${$OPTIND}")) + else + configs_src+=($(eval "echo \${$OPTIND}")) + fi + OPTIND=$((OPTIND + 1)) + done + ;; + m) + models_src=($OPTARG) + until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do + models_src+=($(eval "echo \${$OPTIND}")) + OPTIND=$((OPTIND + 1)) + done + ;; ?) exit 1;; -esac + esac done shift $((OPTIND-1)) -if [ $# -ne 1 ]; then - >&2 echo "error: wrong argument (no argument or too many arguments)." - >&2 echo "For help, type $progname -h" - exit 1 +# Backward compatibility (will be deprecated) +# The last remain parameter is model if there is no option "-m" +if [ $# -eq 1 ] && [ ${#models_src[@]} -eq 0 ]; then + models_src=($1) + shift 1 fi -modelfile=$(basename "$1") - -if [[ "$modelfile" != *.* ]]; then - >&2 echo "error: modelfile does not have extension." - >&2 echo "Please provide extension so that $progname can identify what type of model you use." +if [ $# -ne 0 ]; then + >&2 echo "error: wrong argument (too many arguments)." + >&2 echo "For help, type $progname -h" exit 1 fi -if [ ! -e $1 ]; then - >&2 echo "error: "$1" does not exist." +if [[ ${#configs_src[@]} -ne 0 ]] && [[ ${#configs_src[@]} -ne ${#models_src[@]} ]]; then + >&2 echo "error: when config file is provided, # of config file should be same with modelfile" + >&2 echo "Please provide config file for each model file, or don't provide config file." exit 1 fi +delim="" +for modelpath in ${models_src[@]} +do + modelfile=$(basename "$modelpath") + + if [[ "$modelfile" != *.* ]]; then + >&2 echo "error: modelfile does not have extension." + >&2 echo "Please provide extension so that $progname can identify what type of model you use." + exit 1 + fi + + if [ ! -e $modelpath ]; then + >&2 echo "error: "$modelpath" does not exist." + exit 1 + fi + + models_str="$models_str$delim\"$modelfile\"" + types_str="$types_str$delim\"${modelfile##*.}\"" + delim=", " +done + +delim="" +for configpath in ${configs_src[@]} +do + configfile=$(basename "$configpath") + + if [ ! -e $configpath ]; then + >&2 echo "error: "$configpath" does not exist." + exit 1 + fi + + configs_str="$configs_str$delim\"$configfile\"" + delim=", " +done + if [ -z "$name" ]; then - name=${modelfile%.*} + first_modelfile=$(basename "${models_src[0]}") + name=${first_modelfile%.*} fi -extension=${modelfile##*.} echo "$progname: Generating nnpackage "$name" in "$outdir"" mkdir -p "$outdir"/"$name"/metadata -if [ -s "$config_src" ]; then - config=$(basename "$config_src") - cp "$config_src" "$outdir/$name/metadata/$config" -fi - cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF { "major-version" : "1", "minor-version" : "2", "patch-version" : "0", - "configs" : [ "$config" ], - "models" : [ "$modelfile" ], - "model-types" : [ "$extension" ] + "configs" : [ $configs_str ], + "models" : [ $models_str ], + "model-types" : [ $types_str ] } EOF -cp "$1" "$outdir"/"$name" + +for modelpath in ${models_src[@]} +do + cp "$modelpath" "$outdir"/"$name" +done + +for configpath in ${configs_src[@]} +do + cp "$configpath" "$outdir/$name/metadata" +done