From ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Mon, 15 Nov 2021 10:54:00 +0900 Subject: [PATCH 1/1] Imported Upstream version 1.19.0 --- .ahub/tcchecker-tca/config.yaml | 2 - .gitattributes | 1 + .github/workflows/check-format.yml | 64 ++++ .github/workflows/check-pr-commit.yml | 51 +++ compiler/circle-execution-plan/CMakeLists.txt | 16 + compiler/circle-execution-plan/README.md | 29 ++ compiler/circle-execution-plan/requires.cmake | 4 + .../src/CircleExecutionPlan.cpp | 99 ++++++ .../circle-execution-plan/src/ExecutionPlanner.cpp | 389 +++++++++++++++++++++ .../circle-execution-plan/src/ExecutionPlanner.h | 130 +++++++ compiler/circle-quantizer/src/CircleQuantizer.cpp | 44 ++- .../pal/cmsisnn/KernelsToBuild.lst | 56 +++ compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h | 33 ++ .../pal/cmsisnn/PALBatchToSpaceND.h | 37 ++ compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h | 70 ++++ .../luci-interpreter/pal/cmsisnn/PALDepthToSpace.h | 35 ++ compiler/luci-interpreter/pal/cmsisnn/PALElu.h | 33 ++ .../luci-interpreter/pal/cmsisnn/PALL2Normalize.h | 34 ++ .../luci-interpreter/pal/cmsisnn/PALL2Pool2D.h | 33 ++ .../luci-interpreter/pal/cmsisnn/PALLeakyRelu.h | 32 ++ compiler/luci-interpreter/pal/cmsisnn/PALMul.h | 45 +++ compiler/luci-interpreter/pal/cmsisnn/PALNeg.h | 32 ++ .../pal/cmsisnn/PALResizeBilinear.h | 37 ++ .../pal/cmsisnn/PALResizeNearestNeighbor.h | 37 ++ compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h | 78 +++++ .../pal/cmsisnn/PALSpaceToBatchND.h | 38 ++ .../luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h | 35 ++ compiler/luci-interpreter/pal/cmsisnn/PALSub.h | 35 ++ compiler/luci-interpreter/pal/cmsisnn/pal.cmake | 62 ++++ compiler/luci-interpreter/src/kernels/Conv2D.cpp | 11 + .../luci-interpreter/src/kernels/Conv2D.test.cpp | 39 +++ compiler/luci-interpreter/src/kernels/Utils.cpp | 1 - .../luci-interpreter/src/loader/CMakeLists.txt | 2 +- .../luci-interpreter/src/loader/GraphLoader.cpp | 62 +++- .../luci-interpreter/src/loader/nodes/Conv2D.cpp | 15 +- compiler/luci-micro/CMakeLists.txt | 5 +- compiler/luci-micro/standalone/Toolchain.cmake | 8 - compiler/luci/CMakeLists.txt | 4 +- compiler/luci/env/CMakeLists.txt | 8 +- compiler/luci/export/CMakeLists.txt | 37 +- compiler/luci/export/src/CircleExporter.test.cpp | 137 ++++++++ compiler/luci/import/CMakeLists.txt | 11 +- .../luci/import/include/luci/Import/CircleReader.h | 71 +++- compiler/luci/import/src/CircleReader.cpp | 184 +++++++++- compiler/luci/import/src/CircleReader.test.cpp | 67 ++++ compiler/luci/import/src/Importer.cpp | 1 + compiler/luci/import/src/Importer.test.cpp | 285 ++++++++++++++- compiler/luci/lang/CMakeLists.txt | 8 +- compiler/luci/log/CMakeLists.txt | 8 +- compiler/luci/log/include/luci/Log.h | 5 +- compiler/luci/log/src/Log.cpp | 20 +- compiler/luci/logex/CMakeLists.txt | 8 +- compiler/luci/partition/CMakeLists.txt | 8 +- compiler/luci/partition/src/PartitionMerge.cpp | 3 + compiler/luci/partition/src/PartitionPGroups.cpp | 115 ++++++ compiler/luci/pass/CMakeLists.txt | 10 +- compiler/luci/pass/include/luci/CircleOptimizer.h | 5 +- .../include/luci/Pass/QuantizeWithMinMaxPass.h | 22 +- compiler/luci/pass/src/CircleOptimizer.cpp | 22 +- .../luci/pass/src/FuseActivationFunctionPass.cpp | 10 +- .../pass/src/FuseActivationFunctionPass.test.cpp | 77 ++++ compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp | 157 ++++++++- compiler/luci/plan/CMakeLists.txt | 19 +- .../luci/plan/src/CircleNodeExecutionPlan.test.cpp | 84 +++++ compiler/luci/profile/CMakeLists.txt | 8 +- compiler/luci/service/CMakeLists.txt | 8 +- compiler/one-cmds/one-build | 44 +++ compiler/one-cmds/one-optimize | 17 + compiler/one-cmds/one-quantize | 89 ++++- compiler/one-cmds/tests/OONE-BUILD_014.cfg | 2 + compiler/one-cmds/tests/one-build_014.cfg | 22 ++ compiler/one-cmds/tests/one-build_014.test | 77 ++++ compiler/one-cmds/tests/one-build_neg_007.test | 69 ++++ compiler/one-cmds/tests/one-build_neg_008.test | 41 +++ compiler/one-cmds/tests/one-build_neg_009.test | 41 +++ compiler/one-cmds/tests/one-quantize_007.test | 55 +++ compiler/one-cmds/tests/one-quantize_008.test | 55 +++ compiler/one-cmds/tests/one-quantize_neg_019.test | 50 +++ compiler/one-cmds/tests/onecc_023.cfg | 15 + compiler/one-cmds/tests/onecc_023.test | 42 +++ compiler/one-cmds/utils.py | 80 ++++- compiler/tflchef/core/src/ModelChef.cpp | 103 +++++- compiler/tflchef/proto/tflchef.proto | 17 + .../tflchef/tests/signature_def_index/test.recipe | 60 ++++ .../tflchef/tests/signature_def_name/test.recipe | 60 ++++ compiler/tfldump/src/Dump.cpp | 12 +- compiler/tflite2circle/driver/Driver.cpp | 5 +- compiler/tflite2circle/include/CircleModel.h | 12 +- compiler/tflite2circle/src/CircleModel.cpp | 150 +++++--- compiler/vconone/CMakeLists.txt | 2 +- docs/conf.py | 2 +- docs/release/1.19/index.rst | 13 + docs/release/1.19/release-note-1.19.0.md | 8 + .../CMSISSource-5.8.0/CMSISSourceConfig.cmake | 4 +- .../FlatBuffers-1.10/FlatBuffersConfig.cmake | 5 +- .../FlatBuffers-1.12/FlatBuffersConfig.cmake | 5 +- infra/cmake/packages/FlatBuffersConfig.cmake | 5 +- .../MbedOSSource-6.15/MbedOSSourceConfig.cmake | 4 +- infra/debian/compiler/changelog | 7 + infra/debian/runtime/changelog | 6 + .../cmake/buildtool/config/arm-none-eabi-gcc.cmake | 66 ++++ infra/nnfw/cmake/packages/ARMComputeConfig.cmake | 7 + packaging/nnfw.spec | 2 +- res/TensorFlowLiteRecipes/Conv2D_005/test.recipe | 34 ++ .../Part_Tanh_FC_nobias/test.recipe | 42 +++ .../SignatureDef_MultiOut_000/test.recipe | 82 +++++ runtime/contrib/android/api/build.gradle | 2 +- runtime/onert/api/include/nnfw_version.h | 2 +- runtime/onert/test/core/compiler/HEScheduler.cc | 16 +- 109 files changed, 4330 insertions(+), 238 deletions(-) create mode 100644 .github/workflows/check-format.yml create mode 100644 .github/workflows/check-pr-commit.yml create mode 100644 compiler/circle-execution-plan/CMakeLists.txt create mode 100644 compiler/circle-execution-plan/README.md create mode 100644 compiler/circle-execution-plan/requires.cmake create mode 100644 compiler/circle-execution-plan/src/CircleExecutionPlan.cpp create mode 100644 compiler/circle-execution-plan/src/ExecutionPlanner.cpp create mode 100644 compiler/circle-execution-plan/src/ExecutionPlanner.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALElu.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALMul.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALNeg.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSub.h create mode 100644 compiler/luci-interpreter/pal/cmsisnn/pal.cmake delete mode 100644 compiler/luci-micro/standalone/Toolchain.cmake create mode 100644 compiler/luci/export/src/CircleExporter.test.cpp create mode 100644 compiler/luci/import/src/CircleReader.test.cpp create mode 100644 compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp create mode 100644 compiler/one-cmds/tests/OONE-BUILD_014.cfg create mode 100644 compiler/one-cmds/tests/one-build_014.cfg create mode 100644 compiler/one-cmds/tests/one-build_014.test create mode 100644 compiler/one-cmds/tests/one-build_neg_007.test create mode 100644 compiler/one-cmds/tests/one-build_neg_008.test create mode 100644 compiler/one-cmds/tests/one-build_neg_009.test create mode 100644 compiler/one-cmds/tests/one-quantize_007.test create mode 100644 compiler/one-cmds/tests/one-quantize_008.test create mode 100644 compiler/one-cmds/tests/one-quantize_neg_019.test create mode 100644 compiler/one-cmds/tests/onecc_023.cfg create mode 100644 compiler/one-cmds/tests/onecc_023.test create mode 100644 compiler/tflchef/tests/signature_def_index/test.recipe create mode 100644 compiler/tflchef/tests/signature_def_name/test.recipe create mode 100644 docs/release/1.19/index.rst create mode 100644 docs/release/1.19/release-note-1.19.0.md create mode 100644 infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake create mode 100644 res/TensorFlowLiteRecipes/Conv2D_005/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe create mode 100644 res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml index 86d272d..40635d4 100644 --- a/.ahub/tcchecker-tca/config.yaml +++ b/.ahub/tcchecker-tca/config.yaml @@ -16,9 +16,7 @@ test: - /runtime/onert/test/graph/verifier - /runtime/onert/test/ir - /runtime/onert/test/util - - /tests/nnapi/src - /tests/nnfw_api/src - - /tests/tools/tflite_run/src testFile: - extension: cpp diff --git a/.gitattributes b/.gitattributes index b8eec3d..d369854 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ tests/nnapi/specs/* linguist-detectable=false +res/* linguist-detectable=false diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml new file mode 100644 index 0000000..bcbc3c5 --- /dev/null +++ b/.github/workflows/check-format.yml @@ -0,0 +1,64 @@ +name: Check code format + +on: + push: + branches: + - master + - release/* + pull_request: + branches: + - master + - release/* + +defaults: + run: + shell: bash + +jobs: + check-format: + name: Check format + runs-on: ubuntu-20.04 + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + # C format: clang-format-8 + # Python format: yapf==0.22.0 + - name: Install packages + run: | + sudo apt-get install -y clang-format-8 + python -m pip install --upgrade pip + pip install yapf==0.22.0 + + - name: Check + run: ./nnas format + + # Upload patch file if failed + - name: Store archive + uses: actions/upload-artifact@v2 + if: failure() + with: + name: format-patch + path: format.patch + retention-days: 3 + + check-copyright: + name: Check copyright + runs-on: ubuntu-20.04 + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + # Fetch all history and branch (default: 1) + # Require all history to get file creation date + fetch-depth: 0 + + - name: Check copyright + run: ./nnfw copyright-check diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml new file mode 100644 index 0000000..38c76dc --- /dev/null +++ b/.github/workflows/check-pr-commit.yml @@ -0,0 +1,51 @@ +name: Check PR commit + +on: + pull_request: + branches: + - master + - release/* + +defaults: + run: + shell: bash + +jobs: + check-commit-message: + name: Check commit message + runs-on: ubuntu-20.04 + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + # Checkout PR head commit + # Checkout Action use merge commit as default + ref: ${{ github.event.pull_request.head.sha }} + # Fetch all history and branch (default: 1) + fetch-depth: 0 + + - name: Get commit body + run: | + git log origin/${GITHUB_BASE_REF}..HEAD --format=%b > commit_msg.txt + sed '/^$/d' commit_msg.txt > commit_body.txt + + - name: Check signed-off + run: | + # Check string starting from "Signed-off-by:" + count=$(cat commit_body.txt | grep 'Signed-off-by:' | wc -l) + if [[ ! "$count" -ge "1" ]]; then + exit 1 + fi + + echo "Signed-off-by is OK" + + - name: Check body words + # Run if check_signed_off step is failed + if: ${{ always() }} + run: | + count=$(cat commit_body.txt | sed '/Signed-off-by:/d' | wc -w) + echo "Commit body word check: $count words" + if [[ "$count" -lt "5" ]]; then + exit 1 + fi diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt new file mode 100644 index 0000000..115d248 --- /dev/null +++ b/compiler/circle-execution-plan/CMakeLists.txt @@ -0,0 +1,16 @@ +set(SOURCES + src/CircleExecutionPlan.cpp + src/ExecutionPlanner.cpp + src/ExecutionPlanner.h + ) + +add_executable(circle_execution_plan "${SOURCES}") +target_link_libraries(circle_execution_plan foder) +target_link_libraries(circle_execution_plan safemain) +target_link_libraries(circle_execution_plan luci_env) +target_link_libraries(circle_execution_plan luci_import) +target_link_libraries(circle_execution_plan luci_export) +target_link_libraries(circle_execution_plan luci_plan) +target_link_libraries(circle_execution_plan arser) + +install(TARGETS circle_execution_plan DESTINATION bin) diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md new file mode 100644 index 0000000..e789a55 --- /dev/null +++ b/compiler/circle-execution-plan/README.md @@ -0,0 +1,29 @@ +# circle-execution-plan + +_circle-execution-plan_ tool provides model with "execution plan". + +This tool takes circle file as input and returns modified circle file. +The output circle file contains plan (`CircleNodeMemoryPlan`) information for every node. + + +"execution plan" contains: +- number which determines order in which nodes will be executed +- memory offsets for node output tensors from the beginning of shared memory buffer + +In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`. +For this purpose we use `std::map> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data. + +### Execution plan building + +In order to build "execution plan" we use `ExecutionPlanner` class. +The main method is `get_execution_plan()` which for each node finds and writes to its annotations +"execution plan". For this purpose there are two steps: +- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector. +Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector &roots)`. + In the future we can add new method and find the most suitable way to graph traversal. + +- determining memory offsets for nodes from the beginning of shared memory buffer, which is stored in `_offsets`. +Now for this purpose there is one method `get_offsets_with_greedy_by_size()` that is the implementation of the "Greedy by Size" algorithm, which is described in https://arxiv.org/pdf/2001.03288.pdf article. + The main objective is to minimize the size of the allocated memory block. + In the future, other methods may also appear here to determine memory offsets for nodes + in the best way. diff --git a/compiler/circle-execution-plan/requires.cmake b/compiler/circle-execution-plan/requires.cmake new file mode 100644 index 0000000..76858f4 --- /dev/null +++ b/compiler/circle-execution-plan/requires.cmake @@ -0,0 +1,4 @@ +require(foder) +require(safemain) +require(luci) +require(arser) diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp new file mode 100644 index 0000000..a54100b --- /dev/null +++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include "ExecutionPlanner.h" + +#include + +#include +#include +#include +#include +#include + +int entry(int argc, char **argv) +{ + arser::Arser arser("circle_execution_plan provides model with execution plan meta information"); + + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); + arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + std::cout << arser; + return 255; + } + + std::string input_path = arser.get("input"); + std::string output_path = arser.get("output"); + + foder::FileLoader file_loader{input_path}; + std::vector model_data; + + try + { + model_data = file_loader.load(); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + return EXIT_FAILURE; + } + + flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + return EXIT_FAILURE; + } + + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + return EXIT_FAILURE; + } + + // Import from input Circle file + luci::Importer importer; + auto module = importer.importModule(circle_model); + + // Do main job + luci::ExecutionPlanner execution_planner(module->graph()); + execution_planner.get_execution_plan(); + + // Export to output Circle file + luci::CircleExporter exporter; + luci::CircleFileExpContract contract(module.get(), output_path); + + if (!exporter.invoke(&contract)) + { + std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl; + return 255; + } + + return 0; +} diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp new file mode 100644 index 0000000..c37d1e5 --- /dev/null +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutionPlanner.h" +#include +#include + +namespace luci +{ +namespace +{ + +constexpr uint32_t nodeNotAssigned = std::numeric_limits::max(); + +uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size, + uint32_t stride, uint32_t dilation_rate = 1) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + switch (padding) + { + case Padding::SAME: + return (image_size + stride - 1) / stride; + case Padding::VALID: + return (image_size + stride - effective_filter_size) / stride; + default: + assert(false); + } +} + +// Method finds (if necessary) size for im2col temporary tensor. +uint32_t compute_im2col_size(const luci::CircleConv2D *conv) +{ + auto conv_input = loco::must_cast(conv->input()); + auto filter = loco::must_cast(conv->filter()); + auto padding = (conv->padding()); + uint32_t stride_height = conv->stride()->h(); + uint32_t stride_width = conv->stride()->w(); + + uint32_t dilation_height_factor = conv->dilation()->h(); + uint32_t dilation_width_factor = conv->dilation()->w(); + + uint32_t filter_height = filter->dim(1).value(); + uint32_t filter_width = filter->dim(2).value(); + + const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1; + const bool need_non_dilated_im2col = + stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1; + bool need_im2col = + conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col); + + if (!need_im2col) + { + return 0; + } + + uint32_t input_depth = conv_input->dim(3).value(); + uint32_t input_height = conv_input->dim(1).value(); + uint32_t input_width = conv_input->dim(2).value(); + + uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height, + dilation_height_factor); + uint32_t output_width = + compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor); + + uint32_t batches = conv_input->dim(0).value(); + + return batches * output_height * output_width * input_depth * filter_height * filter_width * + size(conv_input->dtype()); +} + +} // namespace + +void ExecutionPlanner::get_execution_plan() +{ + get_default_execution_order_plan(); + _required_size = get_offsets_with_greedy_by_size(); + for (uint32_t i = 0; i < _ordered_nodes.size(); i++) + { + luci::CircleNodeExecutionPlan execution_plan(i, _offsets[i]); + luci::add_execution_plan(loco::must_cast(_ordered_nodes[i]), + execution_plan); + } + auto settings = luci::UserSettings::settings(); + settings->set(luci::UserSettings::Key::ExecutionPlanGen, true); +} + +void ExecutionPlanner::get_default_execution_order_plan() +{ + // Get execution order in _ordered_nodes + _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast(_graph))); +} + +void ExecutionPlanner::get_usage_interval() +{ + // Initialize vectors of first and last nodes for usage interval + _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned); + _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned); + + // Vector for count usages + std::vector usages_counts(_ordered_nodes.size(), 0); + + auto allocate = [this](uint32_t node, uint32_t tensor) { + if (_alloc_node[tensor] != nodeNotAssigned) + { + return; + } + assert(_dealloc_node[tensor] == nodeNotAssigned); + _alloc_node[tensor] = node; + }; + + auto deallocate = [this](uint32_t node, uint32_t tensor) { + assert(_dealloc_node[tensor] == nodeNotAssigned); + _dealloc_node[tensor] = node; + }; + + // Increase refcounts for graph outputs and inputs nodes + for (auto &output_node : output_nodes(_graph)) + { + auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node); + size_t index = std::distance(_ordered_nodes.begin(), it); + usages_counts[index]++; + } + + for (auto &input_node : input_nodes(_graph)) + { + auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node); + size_t index = std::distance(_ordered_nodes.begin(), it); + usages_counts[index]++; + allocate(0, index); + } + + // Increase refcounts of usage for all nodes in _ordered_nodes vector + for (uint32_t i = 0; i < _ordered_nodes.size(); i++) + { + const auto node = _ordered_nodes.at(i); + auto prev_nodes = preds(node); + for (auto &prev_node : prev_nodes) + { + auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node); + size_t index = std::distance(_ordered_nodes.begin(), it); + usages_counts[index]++; + } + } + + for (uint32_t i = 0; i < _ordered_nodes.size(); i++) + { + const auto node = _ordered_nodes.at(i); + if (const auto *const_node = dynamic_cast(node)) + { + allocate(0, i); + } + allocate(i, i); + + auto prev_nodes = preds(node); + for (auto &prev_node : prev_nodes) + { + auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node); + size_t index = std::distance(_ordered_nodes.begin(), it); + usages_counts[index]--; + if (usages_counts[index] == 0) + { + deallocate(i, index); + } + } + } +} + +uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size() +{ + get_usage_interval(); + auto required_size = greedy_by_size_approach(); + + _offsets.resize(_ordered_nodes.size()); + for (const auto &alloc : _alloc_node_inform_vector) + { + // Fill offsets vector: first go offset for current node and then should go offsets for + // temporaries tensors + if (alloc.is_temp) + { + _offsets[alloc.node_num].push_back(alloc.offset); + } + else + { + _offsets[alloc.node_num].insert(_offsets[alloc.node_num].begin(), alloc.offset); + } + } + return required_size; +} + +uint32_t ExecutionPlanner::greedy_by_size_approach() +{ + size_t result_size = 0; + create_alloc_node_inform_vector(false, false, false); + std::vector ordered_alloc_inform; + for (auto ¤t_node : _alloc_node_inform_vector) + { + if (current_node.size == 0) + { + current_node.offset = 0; + continue; + } + const uint32_t offsetNotAssigned = std::numeric_limits::max(); + size_t best_offset = offsetNotAssigned; + uint32_t best_offset_fit = offsetNotAssigned; + + uint32_t current_offset = 0; + + for (const auto &alloc_inform : ordered_alloc_inform) + { + if ((alloc_inform.last_node < current_node.first_node || + alloc_inform.first_node > current_node.last_node)) + { + continue; + } + + if (current_offset + current_node.size <= alloc_inform.offset && + alloc_inform.offset - current_offset < best_offset_fit) + { + best_offset = current_offset; + best_offset_fit = alloc_inform.offset - current_offset; + } + current_offset = std::max(current_offset, alloc_inform.offset + alloc_inform.size); + } + if (best_offset == offsetNotAssigned) + { + best_offset = current_offset; + } + + result_size = std::max(result_size, best_offset + current_node.size); + current_node.offset = best_offset; + + auto insertion_it = + std::upper_bound(ordered_alloc_inform.begin(), ordered_alloc_inform.end(), current_node); + ordered_alloc_inform.insert(insertion_it, current_node); + } + return result_size; +} + +void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs, + bool null_im2col) +{ + auto node_compare = [this](const AllocationNodeInformation &alloc_1, + const AllocationNodeInformation &alloc_2) { + auto idx1 = alloc_1.node_num; + auto idx2 = alloc_2.node_num; + + if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned) + { + if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned) + { + return idx1 < idx2; + } + return true; + } + if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned) + { + return false; + } + + auto size_1 = alloc_1.size; + auto size_2 = alloc_2.size; + + if (size_1 != size_2) + { + return size_1 > size_2; + } + return this->_alloc_node[idx1] < this->_alloc_node[idx2]; + }; + + _alloc_node_inform_vector.resize(_ordered_nodes.size()); + + for (size_t i = 0; i < _ordered_nodes.size(); i++) + { + auto circle_node = loco::must_cast(_ordered_nodes[i]); + auto node_size = 1; + for (uint32_t axis = 0; axis < circle_node->rank(); ++axis) + { + node_size *= circle_node->dim(axis).value(); + } + node_size *= size(circle_node->dtype()); + + _alloc_node_inform_vector[i].node_num = i; + _alloc_node_inform_vector[i].first_node = _alloc_node[i]; + _alloc_node_inform_vector[i].last_node = _dealloc_node[i]; + + const auto *const_node = dynamic_cast(circle_node); + if (i == 0 && null_inputs) + { + _alloc_node_inform_vector[i].size = 0; + } + else if (const_node && null_consts) + { + _alloc_node_inform_vector[i].size = 0; + } + else + { + _alloc_node_inform_vector[i].size = node_size; + } + + // Im2col + auto opcode = circle_node->opcode(); + if (opcode == luci::CircleOpcode::CONV_2D) + { + auto conv = loco::must_cast(circle_node); + auto im2col_size = compute_im2col_size(conv); + if (im2col_size > 0) + { + AllocationNodeInformation temp_alloc; + + if (null_im2col) + { + temp_alloc.size = 0; + } + else + { + temp_alloc.size = im2col_size; + } + + temp_alloc.first_node = i - 1; + temp_alloc.last_node = i + 1; + temp_alloc.node_num = i; + temp_alloc.is_temp = true; + + _alloc_node_inform_vector.push_back(temp_alloc); + _alloc_node.push_back(i); + _dealloc_node.push_back(i); + } + } + } + // Sort _alloc_node_inform_vector with node_compare for the greedy by size approach. + std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), node_compare); +} + +void ExecutionPlanner::dump_inform() +{ + uint32_t max_breadth = 0; + + for (uint32_t i = 0; i < _ordered_nodes.size(); i++) + { + auto current_node_it = std::find_if( + _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), + [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; }); + for (uint32_t j = 0; j < _ordered_nodes.size(); j++) + { + auto first_node = _alloc_node[j]; + auto last_node = _dealloc_node[j]; + + auto it = std::find_if( + _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), + [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; }); + if (i >= first_node && i <= last_node) + { + current_node_it->breadth += it->size; + } + } + if (max_breadth < current_node_it->breadth) + { + max_breadth = current_node_it->breadth; + } + + auto node = loco::must_cast(_ordered_nodes.at(i)); + printf("node_num = %d node_name = %s node_size = %d node_offset = %d node_breadth = " + "%u node_first_node = %d node_last_node = %d\n", + i, node->name().c_str(), current_node_it->size, current_node_it->offset, + current_node_it->breadth, current_node_it->first_node, current_node_it->last_node); + } + printf("Lower bound is = %u\n", max_breadth); + std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), + [](const AllocationNodeInformation &first, const AllocationNodeInformation &second) { + if (first.breadth != second.breadth) + return first.breadth > second.breadth; + return first.node_num < second.node_num; + }); +} + +} // namespace luci diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h new file mode 100644 index 0000000..8e3d9b4 --- /dev/null +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CIRCLE_EXECUTION_PLANNER_H +#define CIRCLE_EXECUTION_PLANNER_H + +#include +#include + +namespace luci +{ +// struct for additional information for the node. it helps build allocations plan for nodes. +struct AllocationNodeInformation +{ + + AllocationNodeInformation() + { + offset = 0; + size = 0; + node_num = -1; + first_node = -1; + last_node = -1; + is_temp = false; + breadth = 0; + } + // memory offset from the beginning of the buffer + uint32_t offset; + // node required size + uint32_t size; + // the value assigned to the node + uint32_t node_num; + // the value of the node_num of the node when current node first use. + // Used to build the usage interval of the current node + uint32_t first_node; + // the value of the node_num of the node when current node last use. + // Used to build the usage interval of the current node + uint32_t last_node; + // is the current node temporary or not + bool is_temp; + // operation breadth of current node + uint32_t breadth; + + bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; } +}; + +class ExecutionPlanner +{ +public: + ExecutionPlanner() = delete; + explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; }; + + // Method provides execution plan, which contains execution order and + // memory offsets for all nodes in _graph. + // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class. + void get_execution_plan(); + +private: + // Method gets default execution order plan and saves it in _ordered_nodes vector. + // There can be different variants of execution order and this method provides main one. + void get_default_execution_order_plan(); + + // Method provides nodes with usage interval information. + void get_usage_interval(); + + // Method dumps execution plan information. + void dump_inform(); + + // Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach. + // It saves offsets in _offsets vector. + // Return: required size of buffer. + uint32_t get_offsets_with_greedy_by_size(); + + // Realization of greedy by size approach to find offsets for nodes. + uint32_t greedy_by_size_approach(); + + // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes. + // null_consts = true - size of const nodes will be equal 0; + // null_inputs = true - size of input nodes will be equal 0; + // null_im2col = true - size of im2col nodes will be equal 0; + // It using if we don't want to take input(const or im2col) nodes into account + // when determining offsets and calculating the required buffer size. This is uses for + // experiments. + void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false, + bool null_im2col = false); + + // Stores allocation additional information for the all nodes from _graph. + std::vector _alloc_node_inform_vector; + + // Stores nodes in execution order. + std::vector _ordered_nodes; + + // Stores nodes memory offsets in arena buffer. + std::vector> _offsets; + + // Stores positions of nodes in _ordered_nodes vector, + // where node in i'th position in this vector first use. + // For example, if i'th position of _alloc_node stores j value, then + // the node from _ordered_nodes in j'th position is the node when we should allocate (first use) + // the node from _ordered_nodes in i'th position. + std::vector _alloc_node; + + // Stores positions of nodes in _ordered_nodes vector, + // where node in i'th position in this vector last use. + // For example, if i'th position of _alloc_node stores j value, then + // the node from _ordered_nodes in j'th position is the node when we can deallocate (last use) + // the node from _ordered_nodes in i'th position. + std::vector _dealloc_node; + + loco::Graph *_graph; + + // Required memory size. + uint32_t _required_size = 0; +}; + +} // namespace luci + +#endif // CIRCLE_EXECUTION_PLANNER_H diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index 1a09a8a..57ac30a 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -88,24 +88,24 @@ int entry(int argc, char **argv) .type(arser::DataType::STR_VEC) .required(false) .help("Quantize-dequantize weight values required action before quantization. " - "Three arguments required: input_dtype(float32) " - "output_dtype(uint8) granularity(layer, channel)"); + "Three arguments required: input_model_dtype(float32) " + "output_model_dtype(uint8) granularity(layer, channel)"); arser.add_argument(qwmm) .nargs(3) .type(arser::DataType::STR_VEC) .required(false) .help("Quantize with min/max values. " - "Three arguments required: input_dtype(float32) " - "output_dtype(uint8) granularity(layer, channel)"); + "Three arguments required: input_model_dtype(float32) " + "output_model_dtype(uint8) granularity(layer, channel)"); arser.add_argument(rq) .nargs(2) .type(arser::DataType::STR_VEC) .required(false) .help("Requantize a quantized model. " - "Two arguments required: input_dtype(int8) " - "output_dtype(uint8)"); + "Two arguments required: input_model_dtype(int8) " + "output_model_dtype(uint8)"); arser.add_argument(fq) .nargs(3) @@ -116,6 +116,18 @@ int entry(int argc, char **argv) "Three arguments required: tensor_name(string), " "scale(float) zero_point(int)"); + arser.add_argument("--input_type") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .help("Input type of quantized model (uint8 or int16)"); + + arser.add_argument("--output_type") + .nargs(1) + .type(arser::DataType::STR) + .required(false) + .help("Output type of quantized model (uint8 or int16)"); + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); @@ -163,8 +175,8 @@ int entry(int argc, char **argv) } options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0)); - options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1)); + options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0)); + options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1)); options->param(AlgorithmParameters::Quantize_granularity, values.at(2)); } @@ -178,9 +190,17 @@ int entry(int argc, char **argv) } options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0)); - options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1)); + options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0)); + options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1)); options->param(AlgorithmParameters::Quantize_granularity, values.at(2)); + + if (arser["--input_type"]) + options->param(AlgorithmParameters::Quantize_input_type, + arser.get("--input_type")); + + if (arser["--output_type"]) + options->param(AlgorithmParameters::Quantize_output_type, + arser.get("--output_type")); } if (arser[rq]) @@ -193,8 +213,8 @@ int entry(int argc, char **argv) } options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0)); - options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1)); + options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0)); + options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1)); } if (arser[fq]) diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst new file mode 100644 index 0000000..771974a --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -0,0 +1,56 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h new file mode 100644 index 0000000..21e6329 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h new file mode 100644 index 0000000..4dd77ff --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h new file mode 100644 index 0000000..0a8ae4e --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &im2col_shape, + float *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &im2col_shape, + uint8 *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + im2col_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &im2col_shape, + int8 *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h new file mode 100644 index 0000000..8463e57 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h new file mode 100644 index 0000000..4089d0a --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h new file mode 100644 index 0000000..f84742a --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h new file mode 100644 index 0000000..38a302f --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h new file mode 100644 index 0000000..9ccd222 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h new file mode 100644 index 0000000..2b46b10 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include + +namespace luci_interpreter_pal +{ +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const float *input1_data, const tflite::RuntimeShape &input2_shape, + const float *input2_data, const tflite::RuntimeShape &output_shape, + float *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, + const float *input1_data, + const tflite::RuntimeShape &input2_shape, + const float *input2_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h new file mode 100644 index 0000000..be5903a --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h new file mode 100644 index 0000000..cc9f0fd --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h new file mode 100644 index 0000000..f4d5a6e --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h new file mode 100644 index 0000000..6bbda48 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include +#include + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} + +template <> +inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int32_t mult = params.input_multiplier; + const int32_t shift = params.input_left_shift; + const int32_t diff_min = params.diff_min; + + arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h new file mode 100644 index 0000000..fdddaa9 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h new file mode 100644 index 0000000..816b7f6 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h new file mode 100644 index 0000000..ea57578 --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include + +namespace luci_interpreter_pal +{ +template +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake new file mode 100644 index 0000000..9a25a3c --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake @@ -0,0 +1,62 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + nnas_find_package(CMSISSource EXACT 5.8.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + + if (NOT CMSISSource_FOUND) + message(STATUS "Skipping luci-interpreter: CMSISSource not found") + return() + endif () + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES}) + set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN) + target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE + "${CMSISSource_DIR}/CMSIS/NN/Include" + "${CMSISSource_DIR}/CMSIS/DSP/Include" + "${CMSISSource_DIR}/CMSIS/Core/Include") + + target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal) +endmacro() diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index fb5e063..5647f4c 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -129,6 +129,17 @@ void Conv2D::configure() auto im2col = getOutputTensors()[1]; im2col->set_allocatable(false); } + + switch (_params.activation) + { + case Activation::NONE: + case Activation::RELU: + case Activation::RELU6: + case Activation::RELU_N1_TO_1: + break; + default: + throw std::runtime_error("Unsupported fused activation"); + } } void Conv2D::execute() const diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index 277c280..0fe6ef7 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -663,6 +663,45 @@ TEST_F(Conv2DTest, Invalid_Input_Shape_NEG) EXPECT_ANY_THROW(kernel.configure()); } +TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::TANH; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp index 6e83e37..586cfa1 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.cpp +++ b/compiler/luci-interpreter/src/kernels/Utils.cpp @@ -32,7 +32,6 @@ void calculateActivationRange(Activation activation, float *activation_min, floa switch (activation) { case Activation::NONE: - case Activation::TANH: *activation_min = std::numeric_limits::lowest(); *activation_max = std::numeric_limits::max(); break; diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt index 974283a..2cde99f 100644 --- a/compiler/luci-interpreter/src/loader/CMakeLists.txt +++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt @@ -23,7 +23,7 @@ target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER target_link_libraries(${LUCI_INTERPRETER_LOADER} PUBLIC luci_lang ${LUCI_INTERPRETER_CORE} - PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common) + PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan) if(NOT ENABLE_TEST) return() diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index b55e7c5..a14442e 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -18,6 +18,7 @@ #include "loader/KernelBuilder.h" +#include #include namespace luci_interpreter @@ -155,6 +156,15 @@ void GraphLoader::loadTensors() auto tensor = std::make_unique(node->dtype(), std::move(shape), std::move(quantization), node->name()); + // If node has execution plan then read memory offsets for nodes + // from the beginning of shared memory buffer. Used in Static Memory Manager. + if (luci::has_execution_plan(node)) + { + auto execution_plan = luci::get_execution_plan(node); + assert(!execution_plan.offsets().empty()); + tensor->set_offset(execution_plan.offsets().front()); + } + if (const auto *const_node = dynamic_cast(node)) { size_t data_size{}; @@ -199,16 +209,54 @@ void GraphLoader::loadOperators() KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor); // Create kernels for executable nodes. This has to be done in execution order. - for (const loco::Node *loco_node : - loco::postorder_traversal(loco::output_nodes(const_cast(_graph)))) + auto graph = const_cast(_graph); + + auto const graph_nodes = loco::all_nodes(graph); + + // Checking for execution plan in node annotations. + bool has_execution_annotation = true; + auto const checking_exec_plan = [&has_execution_annotation](auto const node) { + const auto *circle_node = loco::must_cast(node); + if (!luci::has_execution_plan(circle_node)) + has_execution_annotation = false; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan); + + if (has_execution_annotation) { - const auto *node = loco::must_cast(loco_node); + // Build ordered_nodes vector that stores the order of execution of graph nodes. + std::vector ordered_nodes(graph_nodes.size()); - if (isExecutableNode(node)) + auto const filler = [&ordered_nodes](auto const node) { + const auto *circle_node = loco::must_cast(node); + auto const position = luci::get_execution_plan(circle_node).order_in_plan(); + ordered_nodes.at(position) = circle_node; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), filler); + + for (auto node : ordered_nodes) { - std::unique_ptr kernel = kernel_builder.build(node); - _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); - _runtime_graph->addKernel(std::move(kernel)); + if (isExecutableNode(node)) + { + std::unique_ptr kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } + else + { + // If it is impossible to build the execution order plan, + // then we use the default postorder_traversal approach. + for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph))) + { + const auto *node = loco::must_cast(loco_node); + if (isExecutableNode(node)) + { + std::unique_ptr kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } } } } diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp index 71c8ef3..22fd1ac 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -17,6 +17,7 @@ #include "Builders.h" #include "kernels/Conv2D.h" +#include namespace luci_interpreter { @@ -31,13 +32,25 @@ std::unique_ptr build_kernel_CircleConv2D(const luci::CircleNode *circle const Tensor *input = helper.getInputTensor(node->input()); const Tensor *filter = helper.getInputTensor(node->filter()); - const Tensor *bias = helper.getInputTensor(node->bias()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); Tensor *output = helper.getOutputTensor(node); auto im2col = std::make_unique(input->element_type(), Shape({}), AffineQuantization{}, ""); im2col->set_observable(false); im2col->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for im2col temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in im2col. + im2col->set_offset(execution_plan.offsets().at(1)); + } Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col)); Conv2DParams params{}; diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt index d936e12..9434708 100644 --- a/compiler/luci-micro/CMakeLists.txt +++ b/compiler/luci-micro/CMakeLists.txt @@ -13,11 +13,12 @@ endif() set(CMAKE_ARM_OPTIONS -DLUCI_INTERPRETER_STATIC=ON -DLUCI_STATIC=ON - "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake" + -DBUILD_CMSIS_NN_FUNCTIONS=ON + -DTARGET_CPU=cortex-m7 + "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake" "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu" "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}" "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}" - -DCPU_ARCH=arm -DC_COMPILER=${ARM_C_COMPILER} -DCXX_COMPILER=${ARM_CXX_COMPILER} -DASM_COMPILER=${ARM_ASM_COMPILER} diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake deleted file mode 100644 index 2d23b5d..0000000 --- a/compiler/luci-micro/standalone/Toolchain.cmake +++ /dev/null @@ -1,8 +0,0 @@ -set(CMAKE_SYSTEM_NAME Generic) - -set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}") -set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -set(CMAKE_C_COMPILER "${C_COMPILER}") -set(CMAKE_CXX_COMPILER "${CXX_COMPILER}") -set(CMAKE_ASM_COMPILER "${ASM_COMPILER}") -set(CMAKE_OBJCOPY "${OBJCOPY}") diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt index 9dcf1b5..b92eefb 100644 --- a/compiler/luci/CMakeLists.txt +++ b/compiler/luci/CMakeLists.txt @@ -4,9 +4,9 @@ # # Currently this feature is used for luci-interpreter MCU builds. if (STATIC_LUCI) - set(LIBRARY_TYPE "STATIC") + set(LUCI_LIBRARY_TYPE "STATIC") else() - set(LIBRARY_TYPE "SHARED") + set(LUCI_LIBRARY_TYPE "SHARED") endif() add_subdirectory(env) diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt index bba5155..7025db2 100644 --- a/compiler/luci/env/CMakeLists.txt +++ b/compiler/luci/env/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_env ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_env ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_env PUBLIC include) target_link_libraries(luci_env PRIVATE nncc_common) install(TARGETS luci_env DESTINATION lib) diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt index 2b41a62..a267d0e 100644 --- a/compiler/luci/export/CMakeLists.txt +++ b/compiler/luci/export/CMakeLists.txt @@ -1,13 +1,12 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") -# TODO enable tests -#file(GLOB_RECURSE TESTS "src/*.test.cpp") -#list(REMOVE_ITEM SOURCES ${TESTS}) +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_export ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_export ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_export PRIVATE src) target_include_directories(luci_export PUBLIC include) target_link_libraries(luci_export PRIVATE luci_lang) @@ -26,13 +25,17 @@ install(TARGETS luci_export DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") -#if(NOT ENABLE_TEST) -# return() -#endif(NOT ENABLE_TEST) -# -#nnas_find_package(GTest REQUIRED) -# -#GTest_AddTest(luci_export_test ${TESTS}) -#target_include_directories(luci_export_test PRIVATE src) -#target_link_libraries(luci_export_test luci_export) -#target_link_libraries(luci_export_test oops) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(luci_export_test ${TESTS}) +target_include_directories(luci_export_test PRIVATE src) +target_link_libraries(luci_export_test luci_export) +target_link_libraries(luci_export_test luci_plan) +target_link_libraries(luci_export_test luci_lang) +target_link_libraries(luci_export_test mio_circle) +target_link_libraries(luci_export_test luci_env) +target_link_libraries(luci_export_test oops) diff --git a/compiler/luci/export/src/CircleExporter.test.cpp b/compiler/luci/export/src/CircleExporter.test.cpp new file mode 100644 index 0000000..5898f9d --- /dev/null +++ b/compiler/luci/export/src/CircleExporter.test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/CircleExporter.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include + +class SampleGraphContract : public luci::CircleExporter::Contract +{ +public: + SampleGraphContract() : luci::CircleExporter::Contract(), _buffer(new std::vector) + { + // create needed entities + _g = loco::make_graph(); + auto graph_input = _g->inputs()->create(); + auto graph_output = _g->outputs()->create(); + input_node = _g->nodes()->create(); + output_node = _g->nodes()->create(); + relu_node = _g->nodes()->create(); + + // link nodes and link them to graph + relu_node->features(input_node); + output_node->from(relu_node); + input_node->index(graph_input->index()); + output_node->index(graph_output->index()); + + // Set needed properties + input_node->name("input"); + output_node->name("output"); + relu_node->name("relu"); + input_node->dtype(loco::DataType::FLOAT32); + + graph_input->shape({1, 2, 3, 4}); + graph_input->dtype(loco::DataType::FLOAT32); + + graph_output->shape({1, 2, 3, 4}); + graph_output->dtype(loco::DataType::FLOAT32); + } + + loco::Graph *graph(void) const override { return _g.get(); } + +public: + bool store(const char *ptr, const size_t size) const override + { + _buffer->resize(size); + std::copy(ptr, ptr + size, _buffer->begin()); + return true; + } + + const std::vector &get_buffer() { return *_buffer; } + +public: + luci::CircleInput *input_node; + luci::CircleOutput *output_node; + luci::CircleRelu *relu_node; + +private: + std::unique_ptr _g; + std::unique_ptr> _buffer; +}; + +TEST(CircleExport, export_execution_plan) +{ + SampleGraphContract contract; + uint32_t reference_order = 1; + uint32_t reference_offset = 100u; + luci::add_execution_plan(contract.relu_node, + luci::CircleNodeExecutionPlan(reference_order, {reference_offset})); + + luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, true); + luci::CircleExporter exporter; + + exporter.invoke(&contract); + + ASSERT_FALSE(contract.get_buffer().empty()); + std::unique_ptr model(circle::GetModel(contract.get_buffer().data())->UnPack()); + ASSERT_NE(model.get(), nullptr); + ASSERT_EQ(model->metadata[0]->name, "ONE_execution_plan_table"); + auto metadata_buffer = model->metadata[0]->buffer; + auto &buffer = model->buffers[metadata_buffer]->data; + ASSERT_EQ(buffer.size(), 20); + uint32_t *raw_table_contents = reinterpret_cast(buffer.data()); + + auto num_entries = raw_table_contents[0]; + ASSERT_EQ(num_entries, 1); + auto node_id = raw_table_contents[1]; + ASSERT_EQ(node_id, 1); // relu node is second (aka id 1) in tological sort in exporter + auto node_plan_size = raw_table_contents[2]; + ASSERT_EQ(node_plan_size, 2); // 1 for execution order, 1 for memory offset value + auto node_plan_order = raw_table_contents[3]; + ASSERT_EQ(node_plan_order, + reference_order); // this value goes from CircleNodeExecutionPlan initialization + auto node_plan_offset = raw_table_contents[4]; + ASSERT_EQ(node_plan_offset, + reference_offset); // this value goes from CircleNodeExecutionPlan initialization +} + +TEST(CircleExport, export_execution_plan_nosetting_NEG) +{ + SampleGraphContract contract; + uint32_t reference_order = 1; + uint32_t reference_offset = 100u; + luci::add_execution_plan(contract.relu_node, + luci::CircleNodeExecutionPlan(reference_order, {reference_offset})); + + luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, false); + luci::CircleExporter exporter; + + exporter.invoke(&contract); + + ASSERT_FALSE(contract.get_buffer().empty()); + std::unique_ptr model(circle::GetModel(contract.get_buffer().data())->UnPack()); + ASSERT_NE(model.get(), nullptr); + ASSERT_EQ(model->metadata.size(), 0); +} diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 1df569d..6630cab 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_import ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_import ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_import PRIVATE src) target_include_directories(luci_import PUBLIC include) target_link_libraries(luci_import PUBLIC luci_lang) @@ -33,3 +33,6 @@ GTest_AddTest(luci_import_test ${TESTS}) target_include_directories(luci_import_test PRIVATE src) target_link_libraries(luci_import_test luci_import) target_link_libraries(luci_import_test oops) +target_link_libraries(luci_import_test luci_plan) +target_link_libraries(luci_import_test luci_lang) +target_link_libraries(luci_import_test mio_circle) diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h index b9697fb..fb38ba9 100644 --- a/compiler/luci/import/include/luci/Import/CircleReader.h +++ b/compiler/luci/import/include/luci/Import/CircleReader.h @@ -36,10 +36,19 @@ namespace luci { bool is_valid(const circle::OperatorCodeT &opcode); +bool is_valid(const circle::OperatorCode *opcode); + bool is_custom(const circle::OperatorCodeT &opcode); +bool is_custom(const circle::OperatorCode *opcode); + std::string opcode_name(const circle::OperatorCodeT &opcode); +std::string opcode_name(const circle::OperatorCode *opcode); + const char *tensor_name(const circle::TensorT &tensor); +const char *tensor_name(const circle::Tensor *tensor); + const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor); +const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor); loco::DataType luci_datatype(circle::TensorType type); FusedActFunc luci_actfunc(const circle::ActivationFunctionType type); @@ -49,29 +58,70 @@ luci::CircleFullyConnected::WeightsFormat luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format); std::unique_ptr luci_quantparam(const circle::QuantizationParametersT *quantization); +std::unique_ptr +luci_quantparam(const circle::QuantizationParameters *quantization); /// @brief Copy common tensor attributes such as name, type, etc. to node. void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node); +void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node); + +/** + * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity + */ +template class VectorWrapper +{ +public: + explicit VectorWrapper(const flatbuffers::Vector *ptr); + + const T *data() const; + uint32_t size() const; + + using iterator = typename flatbuffers::Vector::const_iterator; + iterator begin() const; + iterator end() const; + + using value_type = typename flatbuffers::Vector::return_type; + value_type at(uint32_t i) const; + value_type operator[](uint32_t i) const; + + bool null() const; + bool empty() const; + +private: + const flatbuffers::Vector *_vector; +}; + +template VectorWrapper wrap(const flatbuffers::Vector *vec) +{ + return VectorWrapper(vec); +} /** * @brief Loads Circle file and provides helpers to access attributes */ class CircleReader { -private: +private: // unpack API using CircleBuffers_t = std::vector>; using CircleTensors_t = std::vector>; using CircleOperators_t = std::vector>; using CircleOperatorCodes_t = std::vector>; using CircleMetadata_t = std::vector>; +private: // direct API + using CircleBuffers = VectorWrapper>; + using CircleTensors = VectorWrapper>; + using CircleOperators = VectorWrapper>; + using CircleOperatorCodes = VectorWrapper>; + using CircleMetadataSet = VectorWrapper>; + using CircleSubGraphsPtr_t = flatbuffers::Vector>; using CircleTensorsPtr_t = flatbuffers::Vector>; public: CircleReader() = default; -public: +public: // unpack API const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; } const CircleBuffers_t &buffers() const { return _model->buffers; } const CircleTensors_t &tensors() const { return _current_subgraph->tensors; } @@ -89,6 +139,20 @@ public: circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const; std::string opcode_name(const circle::OperatorT &op) const; +public: // direct API + CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); } + CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); } + CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); } + CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); } + VectorWrapper native_inputs() const { return wrap(_native_subgraph->inputs()); } + VectorWrapper native_outputs() const { return wrap(_native_subgraph->outputs()); } + std::string native_name() const { return _native_subgraph->name()->str(); } + circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); } + CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); } + + circle::BuiltinOperator builtin_code(const circle::Operator *op) const; + std::string opcode_name(const circle::Operator *op) const; + public: bool parse(const circle::Model *model); bool select_subgraph(uint32_t subgraph); @@ -97,8 +161,9 @@ private: std::unique_ptr _model; const circle::SubGraphT *_current_subgraph{nullptr}; - const circle::Model *_model_ptr{nullptr}; + const circle::Model *_native_model{nullptr}; const CircleTensorsPtr_t *_tensors_ptr{nullptr}; + const circle::SubGraph *_native_subgraph{nullptr}; }; } // namespace luci diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp index 6c9bf3a..14917ba 100644 --- a/compiler/luci/import/src/CircleReader.cpp +++ b/compiler/luci/import/src/CircleReader.cpp @@ -29,12 +29,26 @@ bool is_valid(const circle::OperatorCodeT &opcode) return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); } +bool is_valid(const circle::OperatorCode *opcode) +{ + assert(opcode != nullptr); + circle::BuiltinOperator code = opcode->builtin_code(); + return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX); +} + bool is_custom(const circle::OperatorCodeT &opcode) { circle::BuiltinOperator code = opcode.builtin_code; return (code == circle::BuiltinOperator_CUSTOM); } +bool is_custom(const circle::OperatorCode *opcode) +{ + assert(opcode != nullptr); + circle::BuiltinOperator code = opcode->builtin_code(); + return (code == circle::BuiltinOperator_CUSTOM); +} + std::string opcode_name(const circle::OperatorCodeT &opcode) { if (!is_valid(opcode)) @@ -56,6 +70,30 @@ std::string opcode_name(const circle::OperatorCodeT &opcode) return circle::EnumNameBuiltinOperator(code); } +std::string opcode_name(const circle::OperatorCode *opcode) +{ + assert(opcode != nullptr); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + auto custom_code = opcode->custom_code()->str(); + if (custom_code.empty()) + return "(invalid custom)"; + + return custom_code; + } + + circle::BuiltinOperator code = opcode->builtin_code(); + return circle::EnumNameBuiltinOperator(code); +} + const char *tensor_name(const circle::TensorT &tensor) { static const char *kEmptyTensorName = "(noname)"; @@ -66,11 +104,30 @@ const char *tensor_name(const circle::TensorT &tensor) return kEmptyTensorName; } +const char *tensor_name(const circle::Tensor *tensor) +{ + assert(tensor != nullptr); + + static const char *kEmptyTensorName = "(noname)"; + const auto tensor_name = tensor->name()->c_str(); + + if (!std::string(tensor_name).empty()) + return tensor_name; + + return kEmptyTensorName; +} + const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor) { return tensor.quantization.get(); } +const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor) +{ + assert(tensor != nullptr); + return tensor->quantization(); +} + loco::DataType luci_datatype(const circle::TensorType type) { switch (type) @@ -235,6 +292,16 @@ luci_quantparam(const circle::QuantizationParametersT *quantization) return nullptr; } +std::unique_ptr luci_quantparam(const circle::QuantizationParameters *qparams) +{ + // create temporary unpacked API object + assert(qparams != nullptr); + circle::QuantizationParametersT quantization; + qparams->UnPackTo(&quantization); + + return luci_quantparam(&quantization); +} + std::unique_ptr luci_sparsityparam(const circle::SparsityParametersT *sparsity) { assert(sparsity); @@ -257,6 +324,16 @@ std::unique_ptr luci_sparsityparam(const circle::SparsityParamete return sparsityparam; } +std::unique_ptr luci_sparsityparam(const circle::SparsityParameters *sparparam) +{ + // create temporary unpacked API object + assert(sparparam != nullptr); + circle::SparsityParametersT sparsity; + sparparam->UnPackTo(&sparsity); + + return luci_sparsityparam(&sparsity); +} + void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node) { node->name(tensor_name(tensor)); @@ -292,6 +369,45 @@ void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node) } } +void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node) +{ + assert(tensor != nullptr); + + node->name(tensor_name(tensor)); + node->dtype(luci_datatype(tensor->type())); + + const auto tensor_shape_signature = wrap(tensor->shape_signature()); + const auto tensor_shape = wrap(tensor->shape()); + assert(tensor_shape_signature.size() == 0 || + tensor_shape_signature.size() == tensor_shape.size()); + + const auto dims = tensor_shape; // in NHWC + node->rank(dims.size()); + for (uint32_t r = 0; r < dims.size(); ++r) + { + if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1) + node->dim(r).unset(); + else + node->dim(r).set(dims[r]); + } + + const auto quantization = tensor->quantization(); + if (quantization != nullptr) + { + auto quantparam = luci_quantparam(quantization); + if (quantparam) + node->quantparam(std::move(quantparam)); + } + + const auto sparsity = tensor->sparsity(); + if (sparsity != nullptr) + { + auto sparsityparam = luci_sparsityparam(sparsity); + if (sparsityparam) + node->sparsityparam(std::move(sparsityparam)); + } +} + circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const { const auto &op_codes = opcodes(); @@ -326,7 +442,7 @@ bool CircleReader::parse(const circle::Model *model) _model.reset(model->UnPack()); // for direct pointer access - _model_ptr = model; + _native_model = model; return true; } @@ -342,12 +458,72 @@ bool CircleReader::select_subgraph(uint32_t sgindex) _current_subgraph = _model->subgraphs[sgindex].get(); // for direct pointer access - auto subgraphs = _model_ptr->subgraphs(); - const circle::SubGraph *subgraph = (*subgraphs)[sgindex]; + auto subgraphs = _native_model->subgraphs(); + assert(subgraphs != nullptr); + + _native_subgraph = subgraphs->Get(sgindex); + assert(_native_subgraph != nullptr); - _tensors_ptr = subgraph->tensors(); + _tensors_ptr = _native_subgraph->tensors(); return true; } +template +VectorWrapper::VectorWrapper(const flatbuffers::Vector *ptr) : _vector(ptr) +{ + // Do nothing +} + +template uint32_t VectorWrapper::size() const +{ + return null() ? 0 : _vector->size(); +} + +template const T *VectorWrapper::data() const +{ + return null() ? nullptr : _vector->data(); +} + +template typename VectorWrapper::iterator VectorWrapper::begin() const +{ + return null() ? iterator(nullptr, 0) : _vector->begin(); +} + +template typename VectorWrapper::iterator VectorWrapper::end() const +{ + return null() ? begin() : _vector->end(); +} + +template typename VectorWrapper::value_type VectorWrapper::at(uint32_t i) const +{ + if (i >= size()) + { + // TODO find better error message + throw std::range_error("Access to prohibited vector element"); + } + + return _vector->Get(i); +} + +template +typename VectorWrapper::value_type VectorWrapper::operator[](uint32_t i) const +{ + return at(i); +} + +template bool VectorWrapper::null() const { return _vector == nullptr; } +template bool VectorWrapper::empty() const { return size() == 0; } + +#define REGISTER_WRAPPER(T) template class VectorWrapper +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(flatbuffers::Offset); +REGISTER_WRAPPER(int32_t); +REGISTER_WRAPPER(uint8_t); +#undef REGISTER_WRAPPER + } // namespace luci diff --git a/compiler/luci/import/src/CircleReader.test.cpp b/compiler/luci/import/src/CircleReader.test.cpp new file mode 100644 index 0000000..0ce5b60 --- /dev/null +++ b/compiler/luci/import/src/CircleReader.test.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/CircleReader.h" + +#include + +TEST(VectorWrapperTest, basic_pattern) +{ + auto fb_builder = flatbuffers::FlatBufferBuilder(); + + const std::vector data = {1, 4, 2, 0, 7}; + auto const vec_offset = fb_builder.CreateVector(data.data(), data.size()); + auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset); + + auto const wrapper = luci::wrap(vec_pointer); + + ASSERT_EQ(wrapper.size(), data.size()); + ASSERT_TRUE(std::equal(wrapper.begin(), wrapper.end(), data.begin())); +} + +TEST(VectorWrapperTest, wrong_data_NEG) +{ + auto fb_builder = flatbuffers::FlatBufferBuilder(); + + std::vector data = {1, 4, 2, 0, 7}; + auto const vec_offset = fb_builder.CreateVector(data.data(), data.size()); + auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset); + + auto const wrapper = luci::wrap(vec_pointer); + + // change data + std::reverse(data.begin(), data.end()); + + ASSERT_EQ(wrapper.size(), data.size()); + ASSERT_FALSE(std::equal(wrapper.begin(), wrapper.end(), data.begin())); +} + +TEST(VectorWrapperTest, null_pointer) +{ + flatbuffers::Vector *vec_pointer = nullptr; + auto const wrapper = luci::wrap(vec_pointer); + + ASSERT_TRUE(wrapper.null()); + ASSERT_TRUE(wrapper.empty()); +} + +TEST(VectorWrapperTest, prohibited_access_NEG) +{ + flatbuffers::Vector *vec_pointer = nullptr; + auto const wrapper = luci::wrap(vec_pointer); + + ASSERT_ANY_THROW(wrapper.at(0)); +} diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp index 8eae5fc..3f7f785 100644 --- a/compiler/luci/import/src/Importer.cpp +++ b/compiler/luci/import/src/Importer.cpp @@ -119,6 +119,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r } // Create CircleConst nodes for constant tensors. + // NOTE Origin is intentionally not provided for constants. for (uint32_t i = 0; i < tensors.size(); ++i) { luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i); diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp index 8366546..d963b4d 100644 --- a/compiler/luci/import/src/Importer.test.cpp +++ b/compiler/luci/import/src/Importer.test.cpp @@ -16,9 +16,12 @@ #include "luci/Importer.h" -#include +#include +#include #include +#include +#include TEST(TensorFlowLiteImport, Dummy) { @@ -26,3 +29,283 @@ TEST(TensorFlowLiteImport, Dummy) SUCCEED(); } + +// helpers for flatbuffers +namespace +{ + +struct BasicCircleModel +{ + std::unique_ptr model; + + BasicCircleModel() + { + model = std::make_unique(); + model->buffers.push_back(std::make_unique()); + model->description = "nnpackage"; + model->version = 0; + } + + uint32_t add_subgraph() + { + model->subgraphs.push_back(std::make_unique()); + model->subgraphs.back()->name = ""; + model->subgraphs.back()->data_format = circle::DataFormat_CHANNELS_LAST; + return model->subgraphs.size() - 1; + } + + void add_subgraph_inputs(uint32_t subgraph_id, const std::vector &inputs) + { + model->subgraphs[subgraph_id]->inputs.assign(inputs.begin(), inputs.end()); + } + + void add_subgraph_outputs(uint32_t subgraph_id, const std::vector &outputs) + { + model->subgraphs[subgraph_id]->outputs.assign(outputs.begin(), outputs.end()); + } + + uint32_t add_builtin_opcode(circle::BuiltinOperator opcode) + { + uint32_t id = model->operator_codes.size(); + model->operator_codes.push_back(std::make_unique()); + model->operator_codes[id]->builtin_code = opcode; + model->operator_codes[id]->version = 1; + return id; + } + + uint32_t add_buffer() + { + model->buffers.push_back(std::make_unique()); + return model->buffers.size() - 1; + } + + uint32_t add_float_tensor(uint32_t graph_id, const std::vector &shape, + uint32_t buffer_id) + { + auto &graph = model->subgraphs[graph_id]; + uint32_t idx = graph->tensors.size(); + graph->tensors.push_back(std::make_unique()); + graph->tensors[idx]->shape = shape; + graph->tensors[idx]->type = circle::TensorType_FLOAT32; + graph->tensors[idx]->buffer = buffer_id; + graph->tensors[idx]->name = std::to_string(idx); + graph->tensors[idx]->quantization.reset(nullptr); + graph->tensors[idx]->is_variable = false; + graph->tensors[idx]->sparsity.reset(nullptr); + (void)graph->tensors[idx]->shape_signature; + return idx; + } + + uint32_t add_builtin_operator(uint32_t graph_id, uint32_t opcode_id, + const std::vector &inputs, + const std::vector &outputs) + { + auto &graph = model->subgraphs[graph_id]; + auto idx = graph->operators.size(); + graph->operators.push_back(std::make_unique()); + graph->operators[idx]->opcode_index = opcode_id; + graph->operators[idx]->inputs.assign(inputs.begin(), inputs.end()); + graph->operators[idx]->outputs.assign(outputs.begin(), outputs.end()); + graph->operators[idx]->builtin_options.Reset(); + (void)graph->operators[idx]->custom_options; + graph->operators[idx]->custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS; + (void)graph->operators[idx]->mutating_variable_inputs; + (void)graph->operators[idx]->intermediates; + return idx; + } + + uint32_t add_plan_metadata(uint32_t buffer_id) + { + static_assert(sizeof(uint32_t) == 4, "metadata is stored in blocks of 32 bit unsiged ints"); + uint32_t idx = model->metadata.size(); + model->metadata.push_back(std::make_unique()); + model->metadata[idx]->name = "ONE_execution_plan_table"; + model->metadata[idx]->buffer = buffer_id; + model->buffers[buffer_id]->data.resize(4); + auto &entries_count = *reinterpret_cast(model->buffers[buffer_id]->data.data()); + entries_count = 0; + return idx; + } + + void add_plan_entry(uint32_t plan_buffer_id, uint32_t execution_order, + const std::vector &offsets) + { + auto &buffer = model->buffers[plan_buffer_id]->data; + auto old_size = buffer.size(); + assert(old_size % 4 == 0); + assert(old_size > 0); + + // Allocate space for new entry: + // 4 bytes for entry id + // 4 bytes for entry size + // 4 bytes for execution order + // offsets.size() * 4 bytes for offsets + buffer.resize(old_size + 12 + offsets.size() * 4); + uint32_t *number_of_entries_ptr = reinterpret_cast(buffer.data()); + *number_of_entries_ptr += 1; + + uint32_t *entry_data_ptr = reinterpret_cast(buffer.data() + old_size); + + entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id + entry_data_ptr[1] = 1 + offsets.size(); // entry size + entry_data_ptr[2] = execution_order; // execution order + std::copy(offsets.begin(), offsets.end(), entry_data_ptr + 3); + } +}; + +struct SimpleRELUModel : public BasicCircleModel +{ + SimpleRELUModel() + { + auto relu_opcode_id = add_builtin_opcode(circle::BuiltinOperator_RELU); + + uint32_t subgraph_id = add_subgraph(); + + auto input_buffer_id = add_buffer(); + auto output_buffer_id = add_buffer(); + + auto input_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, input_buffer_id); + auto output_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, output_buffer_id); + + add_subgraph_inputs(subgraph_id, {input_tensor_idx}); + add_subgraph_outputs(subgraph_id, {output_tensor_idx}); + + add_builtin_operator(subgraph_id, relu_opcode_id, {0}, {1}); + } +}; + +} // namespace + +/** + * This test checks that one op RELU model with execution plan is successfully imported + */ +TEST(TensorFlowLiteImport, simple_plan) +{ + SimpleRELUModel model; + auto metadata_buffer_id = model.add_buffer(); + model.add_plan_metadata(metadata_buffer_id); + + model.add_plan_entry(metadata_buffer_id, 1, {100}); + model.add_plan_entry(metadata_buffer_id, 2, {300}); + model.add_plan_entry(metadata_buffer_id, 3, {200}); + + flatbuffers::FlatBufferBuilder fbb; + auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr); + circle::FinishModelBuffer(fbb, model_offset); + + auto model_ptr = circle::GetModel(fbb.GetBufferPointer()); + luci::Importer import; + + auto luci_module = import.importModule(model_ptr); + + auto main_graph = luci_module->graph(); + for (int i = 0; i < main_graph->nodes()->size(); ++i) + { + auto node = loco::must_cast(main_graph->nodes()->at(i)); + switch (node->opcode()) + { + case luci::CircleOpcode::CIRCLEINPUT: + { + ASSERT_TRUE(luci::has_execution_plan(node)); + auto plan = luci::get_execution_plan(node); + ASSERT_EQ(plan.order_in_plan(), 1); + ASSERT_EQ(plan.offsets().size(), 1); + ASSERT_EQ(plan.offsets()[0], 100); + break; + } + case luci::CircleOpcode::CIRCLEOUTPUT: + { + ASSERT_TRUE(luci::has_execution_plan(node)); + auto plan = luci::get_execution_plan(node); + ASSERT_EQ(plan.order_in_plan(), 3); + ASSERT_EQ(plan.offsets().size(), 1); + ASSERT_EQ(plan.offsets()[0], 200); + break; + } + case luci::CircleOpcode::RELU: + { + ASSERT_TRUE(luci::has_execution_plan(node)); + auto plan = luci::get_execution_plan(node); + ASSERT_EQ(plan.order_in_plan(), 2); + ASSERT_EQ(plan.offsets().size(), 1); + ASSERT_EQ(plan.offsets()[0], 300); + break; + } + default: + FAIL(); + } + } +} + +/** + * This test checks that model with incomplete execution plan is successfully imported + */ +TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG) +{ + SimpleRELUModel model; + auto metadata_buffer_id = model.add_buffer(); + model.add_plan_metadata(metadata_buffer_id); + + model.add_plan_entry(metadata_buffer_id, 1, {100}); + + flatbuffers::FlatBufferBuilder fbb; + auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr); + circle::FinishModelBuffer(fbb, model_offset); + + auto model_ptr = circle::GetModel(fbb.GetBufferPointer()); + luci::Importer import; + + auto luci_module = import.importModule(model_ptr); + + auto main_graph = luci_module->graph(); + for (int i = 0; i < main_graph->nodes()->size(); ++i) + { + auto node = loco::must_cast(main_graph->nodes()->at(i)); + switch (node->opcode()) + { + case luci::CircleOpcode::CIRCLEINPUT: + { + ASSERT_TRUE(luci::has_execution_plan(node)); + auto plan = luci::get_execution_plan(node); + ASSERT_EQ(plan.order_in_plan(), 1); + ASSERT_EQ(plan.offsets().size(), 1); + ASSERT_EQ(plan.offsets()[0], 100); + break; + } + case luci::CircleOpcode::CIRCLEOUTPUT: + case luci::CircleOpcode::RELU: + { + ASSERT_FALSE(luci::has_execution_plan(node)); + break; + } + default: + FAIL(); + } + } +} + +/** + * This test checks that corrupted execution plan induce exception + */ +TEST(TensorFlowLiteImport, corrupted_plan_NEG) +{ + SimpleRELUModel model; + auto metadata_buffer_id = model.add_buffer(); + model.add_plan_metadata(metadata_buffer_id); + + model.add_plan_entry(metadata_buffer_id, 1, {100}); + model.add_plan_entry(metadata_buffer_id, 2, {300}); + model.add_plan_entry(metadata_buffer_id, 3, {200}); + + // corrupt data + *reinterpret_cast(model.model->buffers[metadata_buffer_id]->data.data()) = 4; + + flatbuffers::FlatBufferBuilder fbb; + auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr); + circle::FinishModelBuffer(fbb, model_offset); + + auto model_ptr = circle::GetModel(fbb.GetBufferPointer()); + luci::Importer import; + + ASSERT_ANY_THROW(import.importModule(model_ptr)); +} diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt index 433b7cd..2f6ee23 100644 --- a/compiler/luci/lang/CMakeLists.txt +++ b/compiler/luci/lang/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_lang ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_lang PRIVATE src) target_include_directories(luci_lang PUBLIC include) target_link_libraries(luci_lang PUBLIC loco) diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt index b64a065..bbd733f 100644 --- a/compiler/luci/log/CMakeLists.txt +++ b/compiler/luci/log/CMakeLists.txt @@ -1,11 +1,11 @@ # TODO Find how to test logging framework file(GLOB_RECURSE SOURCES "src/*.cpp") -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_log ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_log ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_log PUBLIC include) target_link_libraries(luci_log PUBLIC hermes) target_link_libraries(luci_log PRIVATE hermes_std) diff --git a/compiler/luci/log/include/luci/Log.h b/compiler/luci/log/include/luci/Log.h index e148810..ddb34f4 100644 --- a/compiler/luci/log/include/luci/Log.h +++ b/compiler/luci/log/include/luci/Log.h @@ -48,7 +48,6 @@ public: private: bool _show_warn = true; - bool _show_info = false; int _show_verbose = 0; }; @@ -67,8 +66,8 @@ private: #define LOGGER(name) ::luci::Logger name{::luci::LoggingContext::get()}; // TODO Support FATAL, ERROR -#define INFO(name) HERMES_INFO(name) -#define WARN(name) HERMES_WARN(name) +#define INFO(name) HERMES_VERBOSE(name, 3) +#define WARN(name) HERMES_VERBOSE(name, 2) #define VERBOSE(name, lv) HERMES_VERBOSE(name, lv) // WARNING! diff --git a/compiler/luci/log/src/Log.cpp b/compiler/luci/log/src/Log.cpp index c26bf30..0cc45e8 100644 --- a/compiler/luci/log/src/Log.cpp +++ b/compiler/luci/log/src/Log.cpp @@ -33,11 +33,6 @@ namespace */ template T safecast(const char *, const T &); -template <> bool safecast(const char *s, const bool &value) -{ - return (s == nullptr) ? value : (std::stoi(s) != 0); -} - template <> int safecast(const char *s, const int &value) { return (s == nullptr) ? value : std::stoi(s); @@ -68,9 +63,6 @@ LoggerConfig::LoggerConfig() _show_warn = !settings->get(luci::UserSettings::Key::MuteWarnings); - // Turn on info logging if LUCI_LOG is set as non-zero value - _show_info = safecast(std::getenv("LUCI_LOG"), false); - // Turn on verbose logging if LUCI_LOG is set to some level // VERBOSE(l, 1) will be visible with LUCI_LOG=2 and VERBOSE(l, 2) with LUCI_LOG=3 and so on _show_verbose = safecast(std::getenv("LUCI_LOG"), 0); @@ -87,6 +79,8 @@ void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setti void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const { + // TODO remove deprecated codes +#if 0 setting.filter(hermes::SeverityCategory::FATAL).reject_all(); setting.filter(hermes::SeverityCategory::ERROR).reject_all(); setting.filter(hermes::SeverityCategory::WARN).reject_all(); @@ -106,6 +100,16 @@ void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) c { setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose); } +#endif + setting.reject_all(); + setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose); + setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose); + if (_show_warn) + { + setting.filter(hermes::SeverityCategory::WARN).accept_upto(_show_verbose); + } + setting.filter(hermes::SeverityCategory::INFO).accept_upto(_show_verbose); + setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose); } } // namespace luci diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt index 4d801b0..aed9fb7 100644 --- a/compiler/luci/logex/CMakeLists.txt +++ b/compiler/luci/logex/CMakeLists.txt @@ -1,11 +1,11 @@ # TODO Find how to test logging-ex utility file(GLOB_RECURSE SOURCES "src/*.cpp") -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_logex ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_logex PUBLIC include) target_link_libraries(luci_logex PUBLIC loco) target_link_libraries(luci_logex PUBLIC locop) diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt index eacbe1c..ec8e0b0 100644 --- a/compiler/luci/partition/CMakeLists.txt +++ b/compiler/luci/partition/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_partition ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_partition PRIVATE src) target_include_directories(luci_partition PUBLIC include) target_link_libraries(luci_partition PUBLIC luci_lang) diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp index b767c77..c517bf9 100644 --- a/compiler/luci/partition/src/PartitionMerge.cpp +++ b/compiler/luci/partition/src/PartitionMerge.cpp @@ -58,6 +58,9 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups) // we need to clone this CircleConst for each graph of the group. if (dynamic_cast(input) != nullptr) continue; + // Skip also for OutputExclude + if (dynamic_cast(input) != nullptr) + continue; auto input_group = pgroups->group_of(input); // NOTE: all the nodes should be registered and return should be valid group. diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp index e0b4e8e..0080873 100644 --- a/compiler/luci/partition/src/PartitionPGroups.cpp +++ b/compiler/luci/partition/src/PartitionPGroups.cpp @@ -35,6 +35,17 @@ class IsVirtualNode final : public luci::CircleNodeVisitor public: bool visit(const luci::CircleInput *) final { return true; } bool visit(const luci::CircleOutput *) final { return true; } + // For multiple outputs + bool visit(const luci::CircleCustomOut *) final { return true; } + bool visit(const luci::CircleIfOut *) final { return true; } + bool visit(const luci::CircleNonMaxSuppressionV4Out *) final { return true; } + bool visit(const luci::CircleNonMaxSuppressionV5Out *) final { return true; } + bool visit(const luci::CircleSplitOut *) final { return true; } + bool visit(const luci::CircleSplitVOut *) final { return true; } + bool visit(const luci::CircleTopKV2Out *) final { return true; } + bool visit(const luci::CircleUniqueOut *) final { return true; } + bool visit(const luci::CircleUnpackOut *) final { return true; } + bool visit(const luci::CircleWhileOut *) final { return true; } // TODO add all virtual nodes // default is false @@ -58,6 +69,91 @@ bool check_allocate_partition(const luci::CircleNode *node) return true; } +class FindGroupToFollow final : public luci::CircleNodeVisitor +{ +public: + FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups) + : _partition(partition), _pgroups(pgroups) + { + // NOTHING TODO + } + +private: + const std::string &groupof(const luci::CircleNode *input) const + { + auto group = _pgroups->node2group[input]; + assert(not group.empty()); + if (group.empty()) + return _partition.default_group; + return _pgroups->node2group[input]; + } + +public: +#define IMPLEMENT(CLASS) \ + const std::string &visit(const luci::CLASS *node) final \ + { \ + auto input = loco::must_cast(node->input()); \ + return groupof(input); \ + } + + IMPLEMENT(CircleCustomOut); + IMPLEMENT(CircleIfOut); + IMPLEMENT(CircleNonMaxSuppressionV4Out); + IMPLEMENT(CircleNonMaxSuppressionV5Out); + IMPLEMENT(CircleSplitOut); + IMPLEMENT(CircleSplitVOut); + IMPLEMENT(CircleTopKV2Out); + IMPLEMENT(CircleUniqueOut); + IMPLEMENT(CircleUnpackOut); + IMPLEMENT(CircleWhileOut); + +#undef IMPLEMENT + + // return empty for nothing to do + const std::string &visit(const luci::CircleNode *) final { return _empty_str; } + +private: + const luci::PartitionTable &_partition; + luci::PGroups *_pgroups = nullptr; + std::string _empty_str; +}; + +} // namespace + +namespace +{ + +void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx) +{ + auto pgroup = std::make_unique(); + pgroup->group = group; + pgroup->id = idx + 1; + + auto pnode = std::make_unique(); + pnode->node = node; + pnode->group = group; + pnode->pgroup = pgroup.get(); + + pgroup->pnodes.push_back(std::move(pnode)); + + // Set input of PGroup + for (uint32_t in = 0; in < node->arity(); ++in) + { + auto input = loco::must_cast(node->arg(in)); + // this input maybe CircleInput in source graph + // --> not confident this is safe + pgroup->inputs.push_back(input); + } + // Set output of PGroup: node itself or multiple virtual outputs + // TODO support multiple virtual outputs + pgroup->outputs.push_back(node); + + pgroups->node2group[node] = group; + pgroups->id2pgroup[pgroup->id] = pgroup.get(); + + pgroups->pgroups.push_back(std::move(pgroup)); +} + } // namespace namespace luci @@ -120,6 +216,8 @@ std::unique_ptr produce_pgroups(const luci::Module *source, INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group << std::endl; + append(node, pgroups.get(), group, idx); +#if 0 auto pgroup = std::make_unique(); pgroup->group = group; pgroup->id = idx + 1; @@ -147,6 +245,7 @@ std::unique_ptr produce_pgroups(const luci::Module *source, pgroups->id2pgroup[pgroup->id] = pgroup.get(); pgroups->pgroups.push_back(std::move(pgroup)); +#endif } else { @@ -156,6 +255,22 @@ std::unique_ptr produce_pgroups(const luci::Module *source, } } + // handle for virtual nodes like multiple outputs + // these nodes should follow group of the input + for (uint32_t idx = 0; idx < nodes->size(); ++idx) + { + auto node = loco::must_cast(nodes->at(idx)); + + // for virtual nodes like CircleUnpackOut should follow it's input (owner) + // or just set to default + FindGroupToFollow query(partition, pgroups.get()); + const auto &group = node->accept(&query); + if (not group.empty()) + { + append(node, pgroups.get(), group, idx); + } + } + return std::move(pgroups); } diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt index 2361bb4..b8b406a 100644 --- a/compiler/luci/pass/CMakeLists.txt +++ b/compiler/luci/pass/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +nnas_find_package(FlatBuffers EXACT 1.12 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "FlatBuffers NOT FOUND") return() @@ -8,11 +8,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_pass PRIVATE src) target_include_directories(luci_pass PUBLIC include) target_link_libraries(luci_pass PUBLIC loco) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 917caca..658563e 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -93,6 +93,8 @@ public: Quantize_tensor_names, Quantize_scales, Quantize_zero_points, + Quantize_input_type, + Quantize_output_type, // sparsify Sparsify_tensor_name, @@ -104,9 +106,6 @@ public: // convert NCHW to NHWC NCHW_to_NHWC_input_shape, NCHW_to_NHWC_output_shape, - - Quantize_input_dtype = Quantize_input_model_dtype, // TODO Remove this - Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this }; virtual ~Options() = default; diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h index d618a07..648abad 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h @@ -31,11 +31,23 @@ namespace luci */ class QuantizeWithMinMaxPass : public logo::Pass { + // For backward-compatibility + // TODO Remove this constructor public: QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, QuantizationGranularity granularity) - : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{ - granularity} + : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, + _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype} + { + // DO NOTHING + } + +public: + QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, + QuantizationGranularity granularity, loco::DataType input_type, + loco::DataType output_type) + : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, + _granularity{granularity}, _input_type{input_type}, _output_type{output_type} { // DO NOTHING } @@ -45,9 +57,15 @@ public: bool run(loco::Graph *graph); private: + void set_input_type(loco::Graph *graph) const; + void set_output_type(loco::Graph *graph) const; + +private: loco::DataType _input_model_dtype; loco::DataType _output_model_dtype; QuantizationGranularity _granularity; + loco::DataType _input_type; + loco::DataType _output_type; }; } // namespace luci diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 5d0c926..75f04b3 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -468,12 +468,20 @@ void CircleOptimizer::quantize(loco::Graph *g) const static const std::vector qwmm_supported_input_model_dtype{"float32"}; static const std::vector qwmm_supported_output_model_dtype{"uint8", "int16"}; static const std::vector qwmm_supported_granularity{"layer", "channel"}; + static const std::vector qwmm_supported_input_type{"uint8", "int16"}; + static const std::vector qwmm_supported_output_type{"uint8", "int16"}; auto input_model_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); auto output_model_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); + auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type); + if (input_type.empty()) + input_type = output_model_dtype; + auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type); + if (output_type.empty()) + output_type = output_model_dtype; if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype)) throw std::runtime_error("Unsupported input type. List of supported input types: " + @@ -487,13 +495,21 @@ void CircleOptimizer::quantize(loco::Graph *g) const throw std::runtime_error("Unsupported granularity. List of supported granularity: " + to_string(qwmm_supported_granularity)); + if (!in_array(to_lower_case(input_type), qwmm_supported_input_type)) + throw std::runtime_error("Unsupported input type. List of supported input types: " + + to_string(qwmm_supported_input_type)); + + if (!in_array(to_lower_case(output_type), qwmm_supported_output_type)) + throw std::runtime_error("Unsupported output type. List of supported output types: " + + to_string(qwmm_supported_output_type)); + if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && str_to_dtype(output_model_dtype) != loco::DataType::U8) throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); - luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype), - str_to_dtype(output_model_dtype), - str_to_granularity(granularity)); + luci::QuantizeWithMinMaxPass quantizer( + str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype), + str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type)); quantizer.run(g); // Post-quantization optimizations diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp index 66e3415..d83973c 100644 --- a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp +++ b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp @@ -72,13 +72,6 @@ bool fuse_activation_function(luci::CircleNode *node) else return false; } - else if (opcode == luci::CircleOpcode::TANH) - { - if (fused_act == luci::FusedActFunc::NONE) - target_func = luci::FusedActFunc::TANH; - else - return false; - } else return false; @@ -98,8 +91,9 @@ bool FuseActivationFunctionPass::run(loco::Graph *g) { auto circle_node = static_cast(node); auto opcode = circle_node->opcode(); + // TANH is not supported as CONV fused with TANH is not supported in luci-interpreter if (opcode == luci::CircleOpcode::RELU || opcode == luci::CircleOpcode::RELU6 || - opcode == luci::CircleOpcode::RELU_N1_TO_1 || opcode == luci::CircleOpcode::TANH) + opcode == luci::CircleOpcode::RELU_N1_TO_1) { if (fuse_activation_function(circle_node)) changed = true; diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp index 56b4141..9e0a80d 100644 --- a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp +++ b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp @@ -86,6 +86,47 @@ protected: luci::CircleConst *_conv2_b = nullptr; }; +class ConvTanhConvGraphlet +{ +public: + ConvTanhConvGraphlet() = default; + + void init(loco::Graph *g) + { + _conv1 = g->nodes()->create(); + _conv2 = g->nodes()->create(); + _tanh = g->nodes()->create(); + _conv1_f = g->nodes()->create(); + _conv1_b = g->nodes()->create(); + _conv2_f = g->nodes()->create(); + _conv2_b = g->nodes()->create(); + + _conv1->fusedActivationFunction(luci::FusedActFunc::NONE); + + _conv1->name("conv1"); + _conv2->name("conv2"); + _tanh->name("tanh"); + _conv1_f->name("conv1f"); + _conv1_b->name("conv1b"); + _conv2_f->name("conv2f"); + _conv2_b->name("conv2b"); + } + +public: + luci::CircleTanh *tanh() { return _tanh; } + luci::CircleConv2D *conv1() { return _conv1; } + luci::CircleConv2D *conv2() { return _conv2; } + +protected: + luci::CircleConv2D *_conv1 = nullptr; + luci::CircleConv2D *_conv2 = nullptr; + luci::CircleTanh *_tanh = nullptr; + luci::CircleConst *_conv1_f = nullptr; + luci::CircleConst *_conv1_b = nullptr; + luci::CircleConst *_conv2_f = nullptr; + luci::CircleConst *_conv2_b = nullptr; +}; + class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet { public: @@ -110,6 +151,30 @@ public: } }; +class FuseTanhActTestGraph : public TestIOGraph, public ConvTanhConvGraphlet +{ +public: + FuseTanhActTestGraph() = default; + + void init(void) + { + TestIOGraph::init({1}, {1}); + ConvTanhConvGraphlet::init(g()); + + _conv1->input(input()); + _conv1->filter(_conv1_f); + _conv1->bias(_conv1_b); + + _tanh->x(_conv1); + + _conv2->input(_tanh); + _conv2->filter(_conv2_f); + _conv2->bias(_conv2_b); + + output()->from(_conv2); + } +}; + class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet { public: @@ -190,3 +255,15 @@ TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG) // Relu input Conv2D already has activation function EXPECT_FALSE(pass.run(g.g())); } + +TEST(FusePreActivationBatchNorm, fuse_tanh_NEG) +{ + FuseTanhActTestGraph g; + luci::FuseActivationFunctionPass pass; + + g.init(); + + // Tanh should not be fused + // This can be changed when CONV+TANH is supported by luci-interpreter + EXPECT_FALSE(pass.run(g.g())); +} diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index be81732..c3552ec 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,52 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc } } +// Create a Quantize Op whose +// dtype is out_type +// shape is the same with node +// qparam is computed using node's min/max +luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type) +{ + auto quantize = node->graph()->nodes()->create(); + quantize->name(node->name() + "_Quantize"); + quantize->dtype(out_type); + quantize->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + quantize->dim(i).set(node->dim(i).value()); + + quantize->shape_status(luci::ShapeStatus::VALID); + + auto qparam = node->quantparam(); + assert(qparam); // FIX_CALLER_UNLESS + assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS + assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS + auto min = qparam->min[0]; + auto max = qparam->max[0]; + + float scaling_factor{0}; + int64_t zp{0}; + float nudged_min{0}; + float nudged_max{0}; + + if (out_type == loco::DataType::U8) + { + compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + } + else + { + assert(out_type == loco::DataType::S16); + compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + } + + auto quantparam = std::make_unique(); + quantparam->scale.push_back(scaling_factor); + quantparam->zerop.push_back(zp); + + quantize->quantparam(std::move(quantparam)); + + return quantize; +} + } // namespace namespace luci @@ -743,8 +790,6 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor); } - circle_node->quantparam()->min.clear(); - circle_node->quantparam()->max.clear(); circle_node->quantparam()->scale.push_back(scaling_factor); circle_node->quantparam()->zerop.push_back(zp); } @@ -1467,6 +1512,97 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant quant_input(&CirclePadV2::constant_values, 2); } +void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const +{ + auto inputs = g->inputs(); + for (auto node : loco::input_nodes(g)) + { + auto input = loco::must_cast(node); + if (input->dtype() == _input_type) + continue; + + // Bool type is not quantizable + if (input->dtype() == loco::DataType::BOOL) + continue; + + // Insert Quantize Op + auto quant_op = create_quantize_op(input, input->dtype()); + loco::replace(input).with(quant_op); + quant_op->input(input); + + // TODO Set a proper origin (Quantize should have its own Origin) + { + auto succs = loco::succs(quant_op); + assert(succs.size() > 0); + auto succ = loco::must_cast(*succs.begin()); + luci::add_origin(quant_op, luci::get_origin(succ)); + } + + // Requantize input + { + auto quantparam = input->quantparam(); + assert(quantparam); + assert(quantparam->min.size() == 1); // only support layer-wise quant + assert(quantparam->max.size() == 1); // only support layer-wise quant + auto min = quantparam->min[0]; + auto max = quantparam->max[0]; + + float scaling_factor{0}; + int64_t zp{0}; + float nudged_min{0}; + float nudged_max{0}; + + if (_input_type == loco::DataType::U8) + { + compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + } + else + { + assert(_input_type == loco::DataType::S16); + compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); + } + input->dtype(_input_type); + input->quantparam()->scale[0] = scaling_factor; + input->quantparam()->zerop[0] = zp; + } + + auto graph_input = inputs->at(input->index()); + graph_input->dtype(_input_type); + } +} + +void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const +{ + auto outputs = g->outputs(); + for (auto node : loco::output_nodes(g)) + { + auto output = loco::must_cast(node); + if (output->dtype() == _output_type) + continue; + + // Bool type is not quantizable + if (output->dtype() == loco::DataType::BOOL) + continue; + + auto from = loco::must_cast(output->from()); + + // The last Op is not quantizable Op (ex: ArgMax) + if (not from->quantparam()) + continue; + + // Insert Quantize Op + auto quant_op = create_quantize_op(from, _output_type); + loco::replace(from).with(quant_op); + quant_op->input(from); + + // TODO Set a proper origin (Quantize should have its own Origin) + luci::add_origin(quant_op, luci::get_origin(from)); + + auto graph_output = outputs->at(output->index()); + graph_output->dtype(_output_type); + } +} + bool QuantizeWithMinMaxPass::run(loco::Graph *g) { LOGGER(l); @@ -1539,6 +1675,23 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) } } + // Set input type + set_input_type(g); + + // Set output type + set_output_type(g); + + // Remove min/max values + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast(node); + if (auto qparam = circle_node->quantparam()) + { + qparam->min.clear(); + qparam->max.clear(); + } + } + INFO(l) << "QuantizeWithMinMaxPass End" << std::endl; return false; // one time run } diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt index 9ca6dcb..d4c8f63 100644 --- a/compiler/luci/plan/CMakeLists.txt +++ b/compiler/luci/plan/CMakeLists.txt @@ -1,10 +1,12 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_plan ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_plan PRIVATE src) target_include_directories(luci_plan PUBLIC include) target_link_libraries(luci_plan PUBLIC loco) @@ -13,3 +15,12 @@ target_link_libraries(luci_plan PUBLIC luci_lang) install(TARGETS luci_plan DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(luci_plan_test ${TESTS}) +target_link_libraries(luci_plan_test luci_plan) diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp new file mode 100644 index 0000000..d7ccf25 --- /dev/null +++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Plan/CircleNodeExecutionPlan.h" + +#include + +#include +#include + +TEST(CircleNodeExecutionPlan, basic_fields) +{ + luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7}); + + ASSERT_EQ(plan.order_in_plan(), 123); + ASSERT_THAT(plan.offsets(), testing::ElementsAre(4, 5, 6, 7)); + + plan.order_in_plan(321); + plan.offsets({1, 2, 3, 4}); + + ASSERT_EQ(plan.order_in_plan(), 321); + ASSERT_THAT(plan.offsets(), testing::ElementsAre(1, 2, 3, 4)); +} + +TEST(CircleNodeExecutionPlan, add_extract_plan) +{ + auto g = loco::make_graph(); + auto add = g->nodes()->create(); + + ASSERT_FALSE(luci::has_execution_plan(add)); + + luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7}); + luci::add_execution_plan(add, plan); + + ASSERT_TRUE(luci::has_execution_plan(add)); + + auto extracted_plan = luci::get_execution_plan(add); + + ASSERT_EQ(extracted_plan.order_in_plan(), 123); + ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(4, 5, 6, 7)); +} + +TEST(CircleNodeExecutionPlan, extract_plan_NEG) +{ + auto g = loco::make_graph(); + auto add = g->nodes()->create(); + + ASSERT_FALSE(luci::has_execution_plan(add)); + + ASSERT_ANY_THROW(luci::get_execution_plan(add)); +} + +TEST(CircleNodeExecutionPlan, double_set_plan_NEG) +{ + auto g = loco::make_graph(); + auto add = g->nodes()->create(); + + ASSERT_FALSE(luci::has_execution_plan(add)); + + luci::CircleNodeExecutionPlan plan1(123, {4, 5, 6, 7}); + luci::add_execution_plan(add, plan1); + ASSERT_TRUE(luci::has_execution_plan(add)); + + luci::CircleNodeExecutionPlan plan2(321, {1, 2, 3, 4}); + luci::add_execution_plan(add, plan2); + ASSERT_TRUE(luci::has_execution_plan(add)); + + auto extracted_plan = luci::get_execution_plan(add); + ASSERT_EQ(extracted_plan.order_in_plan(), 321); + ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(1, 2, 3, 4)); +} diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt index ae604ab..f8a0cc0 100644 --- a/compiler/luci/profile/CMakeLists.txt +++ b/compiler/luci/profile/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_profile ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_profile PRIVATE src) target_include_directories(luci_profile PUBLIC include) target_link_libraries(luci_profile PUBLIC loco) diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt index f48210b..0e6097f 100644 --- a/compiler/luci/service/CMakeLists.txt +++ b/compiler/luci/service/CMakeLists.txt @@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -if (NOT LIBRARY_TYPE) - set(LIBRARY_TYPE "SHARED") -endif(NOT LIBRARY_TYPE) +if (NOT LUCI_LIBRARY_TYPE) + set(LUCI_LIBRARY_TYPE "SHARED") +endif(NOT LUCI_LIBRARY_TYPE) -add_library(luci_service ${LIBRARY_TYPE} ${SOURCES}) +add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_service PRIVATE src) target_include_directories(luci_service PUBLIC include) target_link_libraries(luci_service PUBLIC luci_lang) diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build index 79f3230..90dfa77 100644 --- a/compiler/one-cmds/one-build +++ b/compiler/one-cmds/one-build @@ -38,6 +38,16 @@ def _get_parser(): _utils._add_default_arg(parser) + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = ['-' + s for s in opt_name_list] + if not opt_name_list: + opt_help_message = '(No available optimization options)' + else: + opt_help_message = '(Available optimization options: ' + ', '.join( + opt_name_list) + ')' + opt_help_message = 'optimization name to use ' + opt_help_message + parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message) + return parser @@ -55,6 +65,14 @@ def _verify_arg(parser, args): # check if required arguments is given if not _utils._is_valid_attr(args, 'config'): parser.error('-C/--config argument is required') + # check if given optimization option exists + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list] + if _utils._is_valid_attr(args, 'O'): + if ' ' in getattr(args, 'O'): + parser.error('Not allowed to have space in the optimization name') + if not getattr(args, 'O') in opt_name_list: + parser.error('Invalid optimization option') def _get_driver_name(driver_name): @@ -101,6 +119,27 @@ def _verify_cfg(driver_list, config): raise AssertionError('Only one import-* driver can be executed') +# verify given optimization option file +def _verify_opt(args): + if _utils._is_valid_attr(args, 'O'): + config = configparser.ConfigParser() + config.optionxform = str + opt_name_path_dic = dict( + zip(_utils._get_optimization_list(get_name=True), + _utils._get_optimization_list())) + parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')]) + # check if given optimization option file exists + if not parsed: + raise FileNotFoundError('Not found given optimization configuration file') + # check if given optimization option file only has `one-optimize` section + if len(config.sections()) == 1 and config.sections()[0] == 'one-optimize': + pass + else: + raise AssertionError( + 'Optimization configuration file only allowed to have a \'one-optimize\' section' + ) + + def main(): # parse arguments # since the configuration file path is required first, @@ -121,6 +160,9 @@ def main(): ] _verify_cfg(drivers, config) + # verify optimization option file + _verify_opt(args) + # get sections to run section_to_run = [] for d in drivers: @@ -132,6 +174,8 @@ def main(): for section in section_to_run: driver_path = os.path.join(dir_path, _get_driver_name(section)) cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section] + if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'): + cmd += ['-O', getattr(args, 'O')] _utils._run(cmd) diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index 6ce973c..a64abff 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -64,6 +64,9 @@ def _get_parser(): # opt = (option_name, help_message) circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1]) + # optimization option from one-build + parser.add_argument('-O', type=str, help=argparse.SUPPRESS) + return parser @@ -113,6 +116,15 @@ def _optimize(args): _utils._run(circle2circle_cmd, err_prefix="circle2circle", logfile=f) +def _parse_opt(args): + if _utils._is_valid_attr(args, 'O'): + opt_name_path_dic = dict( + zip(_utils._get_optimization_list(get_name=True), + _utils._get_optimization_list())) + config_path = opt_name_path_dic['O' + getattr(args, 'O')] + _utils._parse_cfg_and_overwrite(config_path, 'one-optimize', args) + + def main(): # parse arguments parser = _get_parser() @@ -121,6 +133,11 @@ def main(): # parse configuration file _utils._parse_cfg(args, 'one-optimize') + # parse optimization file + # NOTE if there is a `one-optimize` section in above configuration file as well, + # it will be overwritten + _parse_opt(args) + # verify arguments _verify_arg(parser, args) diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize index cd623a6..22d4ddb 100644 --- a/compiler/one-cmds/one-quantize +++ b/compiler/one-cmds/one-quantize @@ -39,13 +39,13 @@ def _get_parser(): # input and output path. parser.add_argument( - '-i', '--input_path', type=str, help='full filepath of the input file') + '-i', '--input_path', type=str, help='full filepath of the input circle model') parser.add_argument( '-d', '--input_data', type=str, help= - 'full filepath of the input data file. if not specified, run with random input data.' + 'full filepath of the input data used for post-training quantization. if not specified, run with random input data.' ) parser.add_argument( '-f', @@ -55,7 +55,10 @@ def _get_parser(): 'file format of input data. h5/hdf5 (default), list/filelist (a text file where a file path of input data is written in each line), or dir/directory (a directory where input data are saved)' ) parser.add_argument( - '-o', '--output_path', type=str, help='full filepath of the output file') + '-o', + '--output_path', + type=str, + help='full filepath of the output quantized model') # argument for profiling parser.add_argument( @@ -70,41 +73,77 @@ def _get_parser(): quantization_group.add_argument( '--input_dtype', type=str, - help='input data type (supported: float32, default=float32)') + help= + 'input model data type (supported: float32, default=float32). Deprecated (Use input_model_dtype)' + ) + quantization_group.add_argument( + '--input_model_dtype', + type=str, + help='input model data type (supported: float32, default=float32)') quantization_group.add_argument( '--quantized_dtype', type=str, - help='output quantized data type (supported: uint8, int16, default=uint8)') + help='data type of output quantized model (supported: uint8, int16, default=uint8)' + ) quantization_group.add_argument( '--granularity', type=str, - help='quantize granularity (supported: layer, channel, default=layer)') + help='quantization granularity (supported: layer, channel, default=layer)') + quantization_group.add_argument( + '--input_type', + type=str, + help= + 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.' + ) + quantization_group.add_argument( + '--output_type', + type=str, + help= + 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.' + ) quantization_group.add_argument( - '--min_percentile', type=str, help='minimum percentile (0.0~100.0, default=1.0)') + '--min_percentile', + type=str, + help= + 'minimum percentile (0.0~100.0, default=1.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.' + ) quantization_group.add_argument( - '--max_percentile', type=str, help='maximum percentile (0.0~100.0, default=99.0)') + '--max_percentile', + type=str, + help= + 'maximum percentile (0.0~100.0, default=99.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.' + ) quantization_group.add_argument( '--mode', type=str, - help='record mode (supported: percentile/moving_average, default=percentile)') + help= + "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max." + ) - # arguments for force_quantparam - parser.add_argument( + # arguments for force_quantparam option + force_quantparam_group = parser.add_argument_group( + 'arguments for force_quantparam option') + + force_quantparam_group.add_argument( '--force_quantparam', action='store_true', - help='write quantparam to the specified tensor') - parser.add_argument( + help= + 'overwrite quantparam (scale, zero_point) to the specified tensor in the quantized model.' + ) + force_quantparam_group.add_argument( '--tensor_name', type=str, action='append', help='tensor name (string)') - parser.add_argument('--scale', type=float, action='append', help='scale (float)') - parser.add_argument( + force_quantparam_group.add_argument( + '--scale', type=float, action='append', help='scale (float)') + force_quantparam_group.add_argument( '--zero_point', type=int, action='append', help='zero point (int)') return parser def _set_default_values(args): - if not _utils._is_valid_attr(args, 'input_dtype'): - setattr(args, 'input_dtype', 'float32') + if not _utils._is_valid_attr(args, 'input_model_dtype') and not _utils._is_valid_attr( + args, 'input_dtype'): + setattr(args, 'input_model_dtype', 'float32') if not _utils._is_valid_attr(args, 'quantized_dtype'): setattr(args, 'quantized_dtype', 'uint8') if not _utils._is_valid_attr(args, 'granularity'): @@ -174,7 +213,10 @@ def _quantize(args): circle_quantizer_cmd.append('--verbose') # quantize_dequantize_weights circle_quantizer_cmd.append('--quantize_dequantize_weights') - if _utils._is_valid_attr(args, 'input_dtype'): + # Use input_model_dtype if it exists. Use input_dtype otherwise. + if _utils._is_valid_attr(args, 'input_model_dtype'): + circle_quantizer_cmd.append(getattr(args, 'input_model_dtype')) + elif _utils._is_valid_attr(args, 'input_dtype'): circle_quantizer_cmd.append(getattr(args, 'input_dtype')) if _utils._is_valid_attr(args, 'quantized_dtype'): circle_quantizer_cmd.append(getattr(args, 'quantized_dtype')) @@ -243,12 +285,21 @@ def _quantize(args): circle_quantizer_cmd.append('--verbose') # quantize_dequantize_weights circle_quantizer_cmd.append('--quantize_with_minmax') - if _utils._is_valid_attr(args, 'input_dtype'): + # Use input_model_dtype if it exists. Use input_dtype otherwise. + if _utils._is_valid_attr(args, 'input_model_dtype'): + circle_quantizer_cmd.append(getattr(args, 'input_model_dtype')) + elif _utils._is_valid_attr(args, 'input_dtype'): circle_quantizer_cmd.append(getattr(args, 'input_dtype')) if _utils._is_valid_attr(args, 'quantized_dtype'): circle_quantizer_cmd.append(getattr(args, 'quantized_dtype')) if _utils._is_valid_attr(args, 'granularity'): circle_quantizer_cmd.append(getattr(args, 'granularity')) + if _utils._is_valid_attr(args, 'input_type'): + circle_quantizer_cmd.append('--input_type') + circle_quantizer_cmd.append(getattr(args, 'input_type')) + if _utils._is_valid_attr(args, 'output_type'): + circle_quantizer_cmd.append('--output_type') + circle_quantizer_cmd.append(getattr(args, 'output_type')) # input and output path circle_quantizer_cmd.append(tmp_output_path_2) if _utils._is_valid_attr(args, 'output_path'): diff --git a/compiler/one-cmds/tests/OONE-BUILD_014.cfg b/compiler/one-cmds/tests/OONE-BUILD_014.cfg new file mode 100644 index 0000000..a39aae0 --- /dev/null +++ b/compiler/one-cmds/tests/OONE-BUILD_014.cfg @@ -0,0 +1,2 @@ +[one-optimize] +make_batchnorm_gamma_positive=True diff --git a/compiler/one-cmds/tests/one-build_014.cfg b/compiler/one-cmds/tests/one-build_014.cfg new file mode 100644 index 0000000..f09145e --- /dev/null +++ b/compiler/one-cmds/tests/one-build_014.cfg @@ -0,0 +1,22 @@ +[one-build] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v1 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle +make_batchnorm_gamma_positive=False diff --git a/compiler/one-cmds/tests/one-build_014.test b/compiler/one-cmds/tests/one-build_014.test new file mode 100644 index 0000000..b3acbf5 --- /dev/null +++ b/compiler/one-cmds/tests/one-build_014.test @@ -0,0 +1,77 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use `OONE-BUILD_014` optimization option + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +clean_envir() +{ + rm -rf ../optimization/OONE-BUILD_014.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi +} + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + clean_envir + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-build_014.cfg" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + +cp OONE-BUILD_014.cfg ../optimization + +# run test +LUCI_LOG=5 one-build -C ${configfile} -OONE-BUILD_014 > ${filename}.log 2>&1 + +clean_envir + +if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-build_neg_007.test b/compiler/one-cmds/tests/one-build_neg_007.test new file mode 100644 index 0000000..5c5d9af --- /dev/null +++ b/compiler/one-cmds/tests/one-build_neg_007.test @@ -0,0 +1,69 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Valid optimization option but invalid configuration file path + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + rm -rf ../optimization/OONE_BUILD_NEG_007.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi + if grep -q "Not found given configuration file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + + +touch ../optimization/OONE_BUILD_NEG_007.cfg + +configfile=".." + +# run test +one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-build_neg_008.test b/compiler/one-cmds/tests/one-build_neg_008.test new file mode 100644 index 0000000..8ed2871 --- /dev/null +++ b/compiler/one-cmds/tests/one-build_neg_008.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Invalid optimization option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Invalid optimization option" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile=".." + +# run test +one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-build_neg_009.test b/compiler/one-cmds/tests/one-build_neg_009.test new file mode 100644 index 0000000..8d9c831 --- /dev/null +++ b/compiler/one-cmds/tests/one-build_neg_009.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have space in the optimization name + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not allowed to have space in the optimization name" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile=".." + +# run test +one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-quantize_007.test b/compiler/one-cmds/tests/one-quantize_007.test new file mode 100644 index 0000000..34ae92d --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_007.test @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.random.quantized.q16.iq8.circle" + +rm -rf ${outputfile} + +# to create inception_v3.circle +if [[ ! -s ${inputfile} ]]; then + /bin/bash one-import_001.test > /dev/null 2>&1 + return_code=$? + if [[ ${return_code} != 0 ]]; then + trap_err_onexit + fi +fi + +# run test without input data +one-quantize \ +--input_dtype float32 \ +--quantized_dtype int16 \ +--granularity channel \ +--input_type uint8 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_008.test b/compiler/one-cmds/tests/one-quantize_008.test new file mode 100644 index 0000000..aff6bcf --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_008.test @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.random.quantized.q16.oq8.circle" + +rm -rf ${outputfile} + +# to create inception_v3.circle +if [[ ! -s ${inputfile} ]]; then + /bin/bash one-import_001.test > /dev/null 2>&1 + return_code=$? + if [[ ${return_code} != 0 ]]; then + trap_err_onexit + fi +fi + +# run test without input data +one-quantize \ +--input_dtype float32 \ +--quantized_dtype int16 \ +--granularity channel \ +--output_type uint8 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test new file mode 100644 index 0000000..ac920a4 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_neg_019.test @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid min_percentile + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Unsupported input type" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.quantized.neg_019.circle" + +rm -rf ${outputfile}.log + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype int16 \ +--granularity channel \ +--input_type float32 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_023.cfg b/compiler/one-cmds/tests/onecc_023.cfg new file mode 100644 index 0000000..edbcc6f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_023.cfg @@ -0,0 +1,15 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=True +one-pack=False +one-codegen=False + +[one-quantize] +input_path=inception_v3.circle +output_path=inception_v3.onecc_023.q16.iq8.circle +quantized_dtype=int16 +granularity=channel +input_type=uint8 diff --git a/compiler/one-cmds/tests/onecc_023.test b/compiler/one-cmds/tests/onecc_023.test new file mode 100644 index 0000000..50b3b1c --- /dev/null +++ b/compiler/one-cmds/tests/onecc_023.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-import-tf -> one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_023.cfg" +outputfile="inception_v3.onecc_023.q16.iq8.circle" + +rm -rf ${outputfile} + +# run test +onecc -C ${configfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py index efb01a2..5d84c2b 100644 --- a/compiler/one-cmds/utils.py +++ b/compiler/one-cmds/utils.py @@ -16,6 +16,8 @@ import argparse import configparser +import glob +import ntpath import os import subprocess import sys @@ -124,9 +126,34 @@ def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) +def _parse_cfg_and_overwrite(config_path, section, args): + """ + parse given section of configuration file and set the values of args. + Even if the values parsed from the configuration file already exist in args, + the values are overwritten. + """ + if config_path == None: + # DO NOTHING + return + config = configparser.ConfigParser() + # make option names case sensitive + config.optionxform = str + parsed = config.read(config_path) + if not parsed: + raise FileNotFoundError('Not found given configuration file') + if not config.has_section(section): + raise AssertionError('configuration file doesn\'t have \'' + section + + '\' section') + for key in config[section]: + setattr(args, key, config[section][key]) + # TODO support accumulated arguments + + def _parse_cfg(args, driver_name): """parse configuration file. If the option is directly given to the command line, - the option is processed prior to the configuration file.""" + the option is processed prior to the configuration file. + That is, if the values parsed from the configuration file already exist in args, + the values are ignored.""" if _is_valid_attr(args, 'config'): config = configparser.ConfigParser() config.optionxform = str @@ -290,3 +317,54 @@ def _run(cmd, err_prefix=None, logfile=None): logfile.write(line) if p.returncode != 0: sys.exit(p.returncode) + + +def _remove_prefix(str, prefix): + if str.startswith(prefix): + return str[len(prefix):] + return str + + +def _remove_suffix(str, suffix): + if str.endswith(suffix): + return str[:-len(suffix)] + return str + + +def _get_optimization_list(get_name=False): + """ + returns a list of optimization. If `get_name` is True, + only basename without extension is returned rather than full file path. + + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + Optimization options must be placed in `optimization` folder + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + + # optimization folder + files = [f for f in glob.glob(dir_path + '/../optimization/O*.cfg', recursive=True)] + # exclude if the name has space + files = [s for s in files if not ' ' in s] + + opt_list = [] + for cand in files: + base = ntpath.basename(cand) + if os.path.isfile(cand) and os.access(cand, os.R_OK): + opt_list.append(cand) + + if get_name == True: + # NOTE the name includes prefix 'O' + # e.g. O1, O2, ONCHW not just 1, 2, NCHW + opt_list = [ntpath.basename(f) for f in opt_list] + opt_list = [_remove_suffix(s, '.cfg') for s in opt_list] + + return opt_list diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index 7028bd9..ada5ff5 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -207,7 +207,7 @@ struct CookParams std::string noname; }; -template void cook_graph(const T &graph, CookParams &cp) +template std::map cook_graph(const T &graph, CookParams &cp) { LOGGER(l); @@ -537,6 +537,8 @@ template void cook_graph(const T &graph, CookParams &cp) subgraph_builder.add_name(name); subgraph_vec.emplace_back(subgraph_builder.Finish()); + + return symbol_table; } } // namespace @@ -574,6 +576,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) // Operation-related std::vector> code_vec; + // SignatureDef-related + std::vector> signdef_vec; + // Graphs-related std::vector> subgraph_vec; @@ -617,13 +622,18 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) buffer_vec.emplace_back(buffer_builder.Finish()); } + // symbol_tables stores symbol_table of each sub graph + // this is used to find tensor ID(index) with tensor name + std::vector> symbol_tables; + // // Create Main graph // CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder, builtin_code_map, custom_code_vec, "main"}; - cook_graph<::tflchef::ModelRecipe>(model_recipe, cp); + auto table = cook_graph<::tflchef::ModelRecipe>(model_recipe, cp); + symbol_tables.push_back(table); // // Create subgraphs if exist @@ -638,11 +648,97 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder, builtin_code_map, custom_code_vec, stringStream.str()}; - cook_graph<::tflchef::Graph>(graph, cp); + auto table = cook_graph<::tflchef::Graph>(graph, cp); + symbol_tables.push_back(table); + } + + // Create Signature-Def + // + for (int s = 0; s < model_recipe.signature_def_size(); ++s) + { + // load from recipe + const auto &rec_signature_def = model_recipe.signature_def(s); + + std::vector> tensormap_inputs; + std::vector> tensormap_outputs; + + // which subgraph index to cook + auto subgraph_index = 0; + if (rec_signature_def.has_subgraph_index()) + { + subgraph_index = rec_signature_def.subgraph_index(); + } + assert(subgraph_index < symbol_tables.size()); + auto &symbol_table = symbol_tables[subgraph_index]; + + // cook for inputs + for (int si = 0; si < rec_signature_def.inputs_size(); ++si) + { + // recipe for input TensorMap + auto rec_tm_input = rec_signature_def.inputs(si); + auto name = flatbuffer_builder->CreateString(rec_tm_input.name()); + uint32_t tensor_index = 0; + // either tensor or tensor_index should exist + assert(rec_tm_input.has_tensor() || rec_tm_input.has_tensor_index()); + if (rec_tm_input.has_tensor()) + { + // we can get tensor_index from symbol_table + auto tensor = rec_tm_input.tensor(); + tensor_index = symbol_table[tensor]; + } + else + { + // or we can use tensor_index itself + tensor_index = rec_tm_input.tensor_index(); + } + + ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder}; + tensormap_builder.add_name(name); + tensormap_builder.add_tensor_index(tensor_index); + tensormap_inputs.push_back(tensormap_builder.Finish()); + } + // cook for outputs, same as inputs + for (int so = 0; so < rec_signature_def.outputs_size(); ++so) + { + auto rec_tm_output = rec_signature_def.outputs(so); + auto name = flatbuffer_builder->CreateString(rec_tm_output.name()); + uint32_t tensor_index = 0; + assert(rec_tm_output.has_tensor() || rec_tm_output.has_tensor_index()); + if (rec_tm_output.has_tensor()) + { + auto tensor = rec_tm_output.tensor(); + tensor_index = symbol_table[tensor]; + } + else + { + tensor_index = rec_tm_output.tensor_index(); + } + + ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder}; + tensormap_builder.add_name(name); + tensormap_builder.add_tensor_index(tensor_index); + tensormap_outputs.push_back(tensormap_builder.Finish()); + } + + auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs); + auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs); + auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name()); + auto key = flatbuffer_builder->CreateString(rec_signature_def.key()); + // TODO add validation for method_name and key + + ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder}; + signature_def_builder.add_inputs(inputs); + signature_def_builder.add_outputs(outputs); + signature_def_builder.add_method_name(method_name); + signature_def_builder.add_key(key); + signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index()); + + signdef_vec.emplace_back(signature_def_builder.Finish()); } // Create "Model" arguments auto buffers = flatbuffer_builder->CreateVector(buffer_vec); + auto signdefs = flatbuffer_builder->CreateVector(signdef_vec); auto operator_codes = flatbuffer_builder->CreateVector(code_vec); auto subgraphs = flatbuffer_builder->CreateVector(subgraph_vec); auto description = flatbuffer_builder->CreateString("Generated by tflchef"); @@ -652,6 +748,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) model_builder.add_version(3); model_builder.add_operator_codes(operator_codes); + model_builder.add_signature_defs(signdefs); model_builder.add_subgraphs(subgraphs); model_builder.add_description(description); model_builder.add_buffers(buffers); diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto index 34d50d9..4162cb1 100644 --- a/compiler/tflchef/proto/tflchef.proto +++ b/compiler/tflchef/proto/tflchef.proto @@ -647,6 +647,22 @@ message Operation { // use the number not listed in the above reserve list } +message TensorMap { + optional string name = 4; + // use tensor as name of the Operand or use tensor_index as order number. + // either one should exist. + optional string tensor = 5; + optional uint32 tensor_index = 6; +} + +message SignatureDef { + repeated TensorMap inputs = 4; + repeated TensorMap outputs = 5; + optional string method_name = 6; + optional string key = 10; + optional uint32 subgraph_index = 12; +} + // For additional subgraphs message Graph { repeated Operand operand = 1; @@ -664,4 +680,5 @@ message ModelRecipe { optional string name = 5; optional uint32 version = 6 [default = 1]; repeated Graph graph = 7; + repeated SignatureDef signature_def = 8; } diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe new file mode 100644 index 0000000..4481752 --- /dev/null +++ b/compiler/tflchef/tests/signature_def_index/test.recipe @@ -0,0 +1,60 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm1" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm2" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm3" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm1" +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm2" +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm3" +} +signature_def { + inputs: { + name: "ifm" + tensor_index: 0 + } + outputs { + name: "ofm2" + tensor_index: 2 + } + outputs { + name: "ofm3" + tensor_index: 3 + } + outputs { + name: "ofm1" + tensor_index: 1 + } + method_name: "serving_default" + key: "serv" + subgraph_index: 0 +} +input: "ifm" +output: "ofm3" +output: "ofm1" +output: "ofm2" diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe new file mode 100644 index 0000000..79be251 --- /dev/null +++ b/compiler/tflchef/tests/signature_def_name/test.recipe @@ -0,0 +1,60 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm1" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm2" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm3" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm1" +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm2" +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm3" +} +signature_def { + inputs: { + name: "ifm" + tensor_index: 0 + } + outputs { + name: "out2" + tensor: "ofm2" + } + outputs { + name: "out3" + tensor: "ofm3" + } + outputs { + name: "out1" + tensor: "ofm1" + } + method_name: "serving_default" + key: "serv" + subgraph_index: 0 +} +input: "ifm" +output: "ofm3" +output: "ofm1" +output: "ofm2" diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 7a480bc..2351e4c 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -405,7 +405,7 @@ void dump_model(std::ostream &os, const tflite::Model *model) for (uint32_t i = 0; i < signaturedefs->Length(); ++i) { auto sign_i = signaturedefs->Get(i); - os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key(" + os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key(" << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")" << std::endl; @@ -413,16 +413,18 @@ void dump_model(std::ostream &os, const tflite::Model *model) for (uint32_t t = 0; t < inputs_i->Length(); ++t) { auto inputs_i_t = inputs_i->Get(t); - os << " I T(" << t << ") " << inputs_i_t->name()->c_str() << ": " - << inputs_i_t->tensor_index() << std::endl; + os << " I(" << t << ")" + << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") " + << inputs_i_t->name()->c_str() << std::endl; } auto outputs_i = sign_i->outputs(); for (uint32_t t = 0; t < outputs_i->Length(); ++t) { auto outputs_i_t = outputs_i->Get(t); - os << " O T(" << t << ") " << outputs_i_t->name()->c_str() << ": " - << outputs_i_t->tensor_index() << std::endl; + os << " O(" << t << ")" + << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") " + << outputs_i_t->name()->c_str() << std::endl; } } os << std::endl; diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp index 4015631..fb8c211 100644 --- a/compiler/tflite2circle/driver/Driver.cpp +++ b/compiler/tflite2circle/driver/Driver.cpp @@ -80,7 +80,10 @@ int entry(int argc, char **argv) auto flatbuffer_builder = std::make_unique(1024); // convert tflite to circle - tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()}; + tflite2circle::CircleModel circle_model{flatbuffer_builder}; + + circle_model.load_offsets(tfl_model.get_model()); + circle_model.model_build(); std::ofstream outfile{circle_path, std::ios::binary}; diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h index 14c4f1c..189cfaf 100644 --- a/compiler/tflite2circle/include/CircleModel.h +++ b/compiler/tflite2circle/include/CircleModel.h @@ -60,14 +60,17 @@ template class Offset private: using TFLFlatBufVec = flatbuffers::Vector; using CIRFlatBufVecOffset = flatbuffers::Offset>; + using SignatureDefs = flatbuffers::Vector>; public: Offset(void) = delete; Offset(FlatBufBuilder &fb) : _fb{fb} {}; public: - // TODO use _fb - void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec); + void set_signature_defs(const SignatureDefs *offset) { _tfl_signature_def_offsets = offset; } + +public: + void build(const TFLFlatBufVec *tflite_flatbuffer_vec); public: CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; } @@ -75,6 +78,8 @@ public: private: FlatBufBuilder &_fb; CIRFlatBufVecOffset _circle_flatbuffer_vec_offset; + // TODO revise this when Circle supports SignatureDef + const SignatureDefs *_tfl_signature_def_offsets = nullptr; }; class CircleModel @@ -84,9 +89,10 @@ private: public: CircleModel(void) = delete; - CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model); + CircleModel(FlatBufBuilder &fb); public: + void load_offsets(const tflite::Model *tfl_model); void model_build(void) const; const char *base(void) const; size_t size(void) const; diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index 4249f15..90cc415 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -24,19 +24,16 @@ namespace tflite2circle { -template <> -void Offset::build(FlatBufBuilder &fb, - const TFLFlatBufVec *tflite_flatbuffer_vec) +template <> void Offset::build(const TFLFlatBufVec *tflite_flatbuffer_vec) { if (tflite_flatbuffer_vec == nullptr) return; std::vector metadata_buffer_vec{tflite_flatbuffer_vec->begin(), tflite_flatbuffer_vec->end()}; - _circle_flatbuffer_vec_offset = fb->CreateVector(metadata_buffer_vec); + _circle_flatbuffer_vec_offset = _fb->CreateVector(metadata_buffer_vec); } -template <> -void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +template <> void Offset::build(const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector> buffers_vec; @@ -46,21 +43,22 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_f if (it->data()) { std::vector data_vec{it->data()->begin(), it->data()->end()}; - buffer_data = fb->CreateVector(data_vec); + buffer_data = _fb->CreateVector(data_vec); } - circle::BufferBuilder circle_buffer_builder{*fb}; + circle::BufferBuilder circle_buffer_builder{*_fb}; circle_buffer_builder.add_data(buffer_data); auto circle_buffers = circle_buffer_builder.Finish(); buffers_vec.emplace_back(circle_buffers); } - _circle_flatbuffer_vec_offset = fb->CreateVector(buffers_vec); + _circle_flatbuffer_vec_offset = _fb->CreateVector(buffers_vec); } -template <> -void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +template <> void Offset::build(const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector> subgprahs_vec; + int32_t subgraph_index = 0; + for (auto it_sg : *tflite_flatbuffer_vec) { // tensors of subgraph @@ -74,12 +72,12 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite if (it->shape()) { auto shape_vec = std::vector({it->shape()->begin(), it->shape()->end()}); - shape = fb->CreateVector(shape_vec); + shape = _fb->CreateVector(shape_vec); } // name flatbuffers::Offset name; if (it->name()) - name = fb->CreateString(it->name()->str()); + name = _fb->CreateString(it->name()->str()); // quantization flatbuffers::Offset quantization; if (it->quantization()) @@ -100,8 +98,8 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite auto rmax = it->quantization()->max(); tfmin = std::vector{rmin->begin(), rmin->end()}; tfmax = std::vector{rmax->begin(), rmax->end()}; - min = fb->CreateVector(tfmin); - max = fb->CreateVector(tfmax); + min = _fb->CreateVector(tfmin); + max = _fb->CreateVector(tfmax); } if (it->quantization()->scale() && it->quantization()->zero_point()) @@ -110,11 +108,11 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite auto rz = it->quantization()->zero_point(); tfscale = std::vector{rs->begin(), rs->end()}; tfzerop = std::vector{rz->begin(), rz->end()}; - scale = fb->CreateVector(tfscale); - zero_point = fb->CreateVector(tfzerop); + scale = _fb->CreateVector(tfscale); + zero_point = _fb->CreateVector(tfzerop); } - quantization = circle::CreateQuantizationParameters(*fb, min, max, scale, zero_point, + quantization = circle::CreateQuantizationParameters(*_fb, min, max, scale, zero_point, circle::QuantizationDetails_NONE, 0, quantized_dimension); } @@ -135,7 +133,7 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { auto traversal_order_vec = std::vector{ it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()}; - traversal_order = fb->CreateVector(traversal_order_vec); + traversal_order = _fb->CreateVector(traversal_order_vec); } // block_map @@ -143,7 +141,7 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { auto block_map_vec = std::vector{it->sparsity()->block_map()->begin(), it->sparsity()->block_map()->end()}; - block_map = fb->CreateVector(block_map_vec); + block_map = _fb->CreateVector(block_map_vec); } // dim_metadata @@ -154,18 +152,18 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite // array_segments auto tflite_array_segments_type = it->array_segments_type(); auto circle_array_segments = - get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type); + get_circle_sparse_index_vector(*_fb, it->array_segments(), tflite_array_segments_type); auto circle_array_segments_type = get_circle_sparse_index_vector_type(tflite_array_segments_type); // array_indices auto tflite_array_indices_type = it->array_indices_type(); auto circle_array_indices = - get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type); + get_circle_sparse_index_vector(*_fb, it->array_indices(), tflite_array_indices_type); auto circle_array_indices_type = get_circle_sparse_index_vector_type(tflite_array_indices_type); - auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb}; + auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*_fb}; circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format())); circle_dim_metadata_builder.add_dense_size(it->dense_size()); @@ -176,9 +174,9 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite auto dim_metadata = circle_dim_metadata_builder.Finish(); dim_metadata_vec.emplace_back(dim_metadata); } - dim_metadata = fb->CreateVector(dim_metadata_vec); + dim_metadata = _fb->CreateVector(dim_metadata_vec); - sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata); + sparsity = circle::CreateSparsityParameters(*_fb, traversal_order, block_map, dim_metadata); } // shape signature @@ -187,10 +185,10 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { auto shape_signature_vec = std::vector({it->shape_signature()->begin(), it->shape_signature()->end()}); - shape_signature = fb->CreateVector(shape_signature_vec); + shape_signature = _fb->CreateVector(shape_signature_vec); } - circle::TensorBuilder tensor_builder{*fb}; + circle::TensorBuilder tensor_builder{*_fb}; tensor_builder.add_shape(shape); tensor_builder.add_type(get_circle_tensortype(it->type())); tensor_builder.add_buffer(it->buffer()); @@ -202,19 +200,56 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite auto tensor = tensor_builder.Finish(); tensor_vec.emplace_back(tensor); } - auto circle_tensors = fb->CreateVector(tensor_vec); + auto circle_tensors = _fb->CreateVector(tensor_vec); // inputs of subgraph auto tflite_inputs = it_sg->inputs(); std::vector input_vec{tflite_inputs->begin(), tflite_inputs->end()}; - auto circle_inputs = fb->CreateVector(input_vec); + // apply signature_def to input tensor index so that input orders are correct + // NOTE we do not need this when circle format supports signature_def + if (_tfl_signature_def_offsets != nullptr) + { + for (auto it_signdef : *_tfl_signature_def_offsets) + { + if (it_signdef->subgraph_index() == subgraph_index) + { + auto inputs = it_signdef->inputs(); + assert(inputs->size() == input_vec.size()); + uint32_t input_vec_idx = 0; + for (auto it_tm : *inputs) + { + input_vec[input_vec_idx++] = static_cast(it_tm->tensor_index()); + } + } + } + } + + auto circle_inputs = _fb->CreateVector(input_vec); // outputs of subgraph auto tflite_outputs = it_sg->outputs(); std::vector output_vec{tflite_outputs->begin(), tflite_outputs->end()}; - auto circle_outputs = fb->CreateVector(output_vec); + if (_tfl_signature_def_offsets != nullptr) + { + // apply SignatureDef + for (auto it_signdef : *_tfl_signature_def_offsets) + { + if (it_signdef->subgraph_index() == subgraph_index) + { + auto outputs = it_signdef->outputs(); + assert(outputs->size() == output_vec.size()); + uint32_t output_vec_idx = 0; + for (auto it_tm : *outputs) + { + output_vec[output_vec_idx++] = static_cast(it_tm->tensor_index()); + } + } + } + } + + auto circle_outputs = _fb->CreateVector(output_vec); // operators of subgraph std::vector> operator_vec; @@ -226,12 +261,12 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { // inputs std::vector input_vec{it->inputs()->begin(), it->inputs()->end()}; - auto circle_inputs = fb->CreateVector(input_vec); + auto circle_inputs = _fb->CreateVector(input_vec); // outputs std::vector output_vec{it->outputs()->begin(), it->outputs()->end()}; - auto circle_outputs = fb->CreateVector(output_vec); + auto circle_outputs = _fb->CreateVector(output_vec); // builtin options - auto circle_builtin_options = get_circle_builtin_options(*fb, it); + auto circle_builtin_options = get_circle_builtin_options(*_fb, it); auto circle_builtin_options_type = get_circle_builtin_options_type(it); // custom options flatbuffers::Offset> circle_custom_options; @@ -239,14 +274,14 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { std::vector custom_options_vec{it->custom_options()->begin(), it->custom_options()->end()}; - circle_custom_options = fb->CreateVector(custom_options_vec); + circle_custom_options = _fb->CreateVector(custom_options_vec); } // custom options format // TODO Make get_circle_custom_options_format assert(it->custom_options_format() == tflite::CustomOptionsFormat_FLEXBUFFERS); auto circle_custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS; - circle::OperatorBuilder operator_builder{*fb}; + circle::OperatorBuilder operator_builder{*_fb}; operator_builder.add_opcode_index(it->opcode_index()); operator_builder.add_inputs(circle_inputs); operator_builder.add_outputs(circle_outputs); @@ -259,13 +294,13 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite operator_vec.emplace_back(opeartor); } } - auto circle_operators = fb->CreateVector(operator_vec); + auto circle_operators = _fb->CreateVector(operator_vec); // name of subgraph - auto subgraphs_name = fb->CreateString(it_sg->name()); + auto subgraphs_name = _fb->CreateString(it_sg->name()); // subgraphs - auto circle_subgraph_builder = circle::SubGraphBuilder{*fb}; + auto circle_subgraph_builder = circle::SubGraphBuilder{*_fb}; circle_subgraph_builder.add_tensors(circle_tensors); circle_subgraph_builder.add_inputs(circle_inputs); @@ -276,8 +311,11 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite auto circle_subgraph = circle_subgraph_builder.Finish(); subgprahs_vec.emplace_back(circle_subgraph); + + // next subgraph + subgraph_index = subgraph_index + 1; } - _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec); + _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec); } tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) @@ -291,15 +329,14 @@ tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) return opcode->builtin_code(); } -template <> -void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +template <> void Offset::build(const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector> operator_code_vec; for (auto it : *tflite_flatbuffer_vec) { - auto custom_code = fb->CreateString(it->custom_code()); - circle::OperatorCodeBuilder operator_code_builder{*fb}; + auto custom_code = _fb->CreateString(it->custom_code()); + circle::OperatorCodeBuilder operator_code_builder{*_fb}; // TODO support circle deprecated_builtin_code auto bt_code = builtin_code_neutral(it); operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code)); @@ -308,23 +345,28 @@ void Offset::build(FlatBufBuilder &fb, const TFLFlatBufVec *tf auto code = operator_code_builder.Finish(); operator_code_vec.emplace_back(code); } - _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec); + _circle_flatbuffer_vec_offset = _fb->CreateVector(operator_code_vec); } -CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model) +CircleModel::CircleModel(FlatBufBuilder &fb) : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb} { - _operator_codes_offset = std::make_unique>(fb); - _subGraphs_offset = std::make_unique>(fb); - _buffers_offset = std::make_unique>(fb); - _metadata_buffer_offset = std::make_unique>(fb); + // NOTHING TODO +} + +void CircleModel::load_offsets(const tflite::Model *tfl_model) +{ + _operator_codes_offset = std::make_unique>(_fb); + _subGraphs_offset = std::make_unique>(_fb); + _buffers_offset = std::make_unique>(_fb); + _metadata_buffer_offset = std::make_unique>(_fb); - _operator_codes_offset->build(fb, tfl_model->operator_codes()); - _subGraphs_offset->build(fb, tfl_model->subgraphs()); - _buffers_offset->build(fb, tfl_model->buffers()); - _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer()); + _subGraphs_offset->set_signature_defs(tfl_model->signature_defs()); - model_build(); + _operator_codes_offset->build(tfl_model->operator_codes()); + _subGraphs_offset->build(tfl_model->subgraphs()); + _buffers_offset->build(tfl_model->buffers()); + _metadata_buffer_offset->build(tfl_model->metadata_buffer()); } void CircleModel::model_build(void) const diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 50ee052..2241c9e 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000000120001) + set(VCONONE_VERSION 0x0000000000130001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option diff --git a/docs/conf.py b/docs/conf.py index b59cab8..ff4070f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors' author = 'Samsung Research & contributors' # The full version, including alpha/beta/rc tags -release = '1.18.0' +release = '1.19.0' # -- General configuration --------------------------------------------------- diff --git a/docs/release/1.19/index.rst b/docs/release/1.19/index.rst new file mode 100644 index 0000000..c80782c --- /dev/null +++ b/docs/release/1.19/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Wed Nov 10 15:21:13 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.19 +==== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.19.0.md diff --git a/docs/release/1.19/release-note-1.19.0.md b/docs/release/1.19/release-note-1.19.0.md new file mode 100644 index 0000000..e63d870 --- /dev/null +++ b/docs/release/1.19/release-note-1.19.0.md @@ -0,0 +1,8 @@ +# Release Note 1.19.0 + +## ONE Compiler + +### Compiler Frontend + +- `circle-quantizer` supports input/output type option +- Introduce configuration file for optimization options diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake index b48239f..99118c5 100644 --- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake +++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake @@ -3,8 +3,10 @@ function(_CMSISSource_import) nnas_include(OptionTools) envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz) + set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad) - ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}) + ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL} + CHECKSUM "SHA256=${CMSIS_5_8_0_SHA256}") set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE) set(CMSISSource_FOUND TRUE PARENT_SCOPE) diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake index 0eb8eb9..8b0a602 100644 --- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake +++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake @@ -27,8 +27,9 @@ function(_FlatBuffers_build) BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10 BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} - IDENTIFIER "1.10-fix4" - EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + IDENTIFIER "1.10-fix6" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF" + "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON" PKG_NAME "FLATBUFFERS-1.10") endfunction(_FlatBuffers_build) diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake index daa749c..06366db 100644 --- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake +++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake @@ -27,8 +27,9 @@ function(_FlatBuffers_build) BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12 BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} - IDENTIFIER "1.12-fix1" - EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + IDENTIFIER "1.12-fix3" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF" + "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON" PKG_NAME "FLATBUFFERS-1.12") endfunction(_FlatBuffers_build) diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake index e551e29..8b0a602 100644 --- a/infra/cmake/packages/FlatBuffersConfig.cmake +++ b/infra/cmake/packages/FlatBuffersConfig.cmake @@ -27,8 +27,9 @@ function(_FlatBuffers_build) BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10 BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} - IDENTIFIER "1.10-fix4" - EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + IDENTIFIER "1.10-fix6" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF" + "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON" PKG_NAME "FLATBUFFERS-1.10") endfunction(_FlatBuffers_build) diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake index 8b17430..8055545 100644 --- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake +++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake @@ -3,8 +3,10 @@ function(_MbedOSSource_import) nnas_include(OptionTools) envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz) + set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e) - ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}) + ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL} + CHECKSUM "SHA256=${MBEDOS_6_15_SHA256}") set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE) set(MbedOSSource_FOUND TRUE PARENT_SCOPE) diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog index 12af5f9..2763ac5 100644 --- a/infra/debian/compiler/changelog +++ b/infra/debian/compiler/changelog @@ -1,3 +1,10 @@ +one (1.19.0) bionic; urgency=medium + + * `circle-quantizer` supports input/output type option + * Introduce configuration file for optimization options + + -- seongwoo Wed, 10 Nov 2021 15:53:39 +0900 + one (1.18.0) bionic; urgency=medium * More optimization pass diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog index ee0d3e6..4cf0abc 100644 --- a/infra/debian/runtime/changelog +++ b/infra/debian/runtime/changelog @@ -1,3 +1,9 @@ +one (1.19.0) bionic; urgency=low + + * Synch up version with ONE Compiler + + -- Chunseok Lee Wed, 10 Nov 2021 14:23:00 +0900 + one (1.18.0) bionic; urgency=low * Synch up version with ONE Compiler diff --git a/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake new file mode 100644 index 0000000..544be03 --- /dev/null +++ b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake @@ -0,0 +1,66 @@ +set(CMAKE_SYSTEM_NAME Generic) + +set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +set(CMAKE_C_COMPILER "${C_COMPILER}") +set(CMAKE_CXX_COMPILER "${CXX_COMPILER}") +set(CMAKE_ASM_COMPILER "${ASM_COMPILER}") +set(CMAKE_OBJCOPY "${OBJCOPY}") + +set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU") + +# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into +# - CMAKE_SYSTEM_PROCESSOR=cortex-m33 +# - TARGET_CPU_FEATURES=no-fp;no-dsp +string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU}) +list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR) +string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR) + +set(CMAKE_EXECUTABLE_SUFFIX ".elf") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +# Select C/C++ version +set(CMAKE_C_STANDARD 99) +set(CMAKE_CXX_STANDARD 14) + +# Compile options +add_compile_options( + -mcpu=${TARGET_CPU} + -mthumb + "$<$:-gdwarf-3>" + "$<$:-funwind-tables;-frtti;-fexceptions>") + +# Compile definescd +add_compile_definitions( + "$<$>:NDEBUG>") + +# Link options +add_link_options( + -mcpu=${TARGET_CPU} + -mthumb + --specs=nosys.specs) + +# Set floating point unit +if("${TARGET_CPU}" MATCHES "\\+fp") + set(FLOAT hard) +elseif("${TARGET_CPU}" MATCHES "\\+nofp") + set(FLOAT soft) +elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR + "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55") + set(FLOAT hard) +else() + set(FLOAT soft) +endif() + +if (FLOAT) + add_compile_options(-mfloat-abi=${FLOAT}) + add_link_options(-mfloat-abi=${FLOAT}) +endif() + +# Compilation warnings +add_compile_options( + -Wno-all +) diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake index 4761e84..6ae7dea 100644 --- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake +++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake @@ -125,6 +125,13 @@ function(_ARMCompute_Build ARMComputeInstall_DIR) list(APPEND SCONS_OPTIONS "Werror=0") list(APPEND SCONS_OPTIONS "os=${TARGET_OS}") + #### Disable test build + list(APPEND SCONS_OPTIONS "benchmark_tests=0") + list(APPEND SCONS_OPTIONS "validation_tests=0") + list(APPEND SCONS_OPTIONS "benchmark_examples=0") + list(APPEND SCONS_OPTIONS "validate_examples=0") + list(APPEND SCONS_OPTIONS "reference_openmp=0") + if(DEFINED EXTERNALS_BUILD_THREADS) set(N ${EXTERNALS_BUILD_THREADS}) else(DEFINED EXTERNALS_BUILD_THREADS) diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec index 4133d7a..547d46a 100644 --- a/packaging/nnfw.spec +++ b/packaging/nnfw.spec @@ -1,6 +1,6 @@ Name: nnfw Summary: nnfw -Version: 1.18.0 +Version: 1.19.0 Release: 1 Group: Development License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0 diff --git a/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe new file mode 100644 index 0000000..2cd7b90 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe @@ -0,0 +1,34 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 dim: 2 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 1 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + } + input: "ifm" + input: "ker" + input: "" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe new file mode 100644 index 0000000..ead0c33 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe @@ -0,0 +1,42 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 2 dim: 4 } +} +operand { + name: "Tanh" + type: FLOAT32 + shape { dim: 2 dim: 4 } +} +operand { + name: "weight" + type: FLOAT32 + shape { dim: 4 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 2 dim: 4 } +} +operation { + type: "Tanh" + input: "in" + output: "Tanh" +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + } + input: "Tanh" + input: "weight" + input: "" + output: "out" +} +input: "in" +output: "out" diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe new file mode 100644 index 0000000..ae993e6 --- /dev/null +++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe @@ -0,0 +1,82 @@ +operand { + name: "ifm1" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ifm2" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm1" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm2" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm3" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "Add" + input: "ifm1" + input: "ifm2" + output: "ofm1" + add_options { + activation: NONE + } +} +operation { + type: "Mul" + input: "ifm1" + input: "ifm2" + output: "ofm2" + mul_options { + activation: 0 + } +} +operation { + type: "Sub" + input: "ifm1" + input: "ifm2" + output: "ofm3" + sub_options { + activation: 0 + } +} +signature_def { + inputs: { + name: "ifm1" + tensor_index: 0 + } + inputs: { + name: "ifm2" + tensor_index: 1 + } + outputs { + name: "ofm2" + tensor_index: 3 + } + outputs { + name: "ofm3" + tensor_index: 4 + } + outputs { + name: "ofm1" + tensor_index: 2 + } + method_name: "serving_default" + key: "serv" + subgraph_index: 0 +} +input: "ifm1" +input: "ifm2" +output: "ofm3" +output: "ofm1" +output: "ofm2" diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle index b432929..9b8840f 100644 --- a/runtime/contrib/android/api/build.gradle +++ b/runtime/contrib/android/api/build.gradle @@ -8,7 +8,7 @@ android { minSdkVersion 26 targetSdkVersion 29 versionCode 1 - versionName "1.18.0" + versionName "1.19.0" externalNativeBuild { ndkBuild { diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 4fce291..b885a6b 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01001200 +#define NNFW_VERSION 0x01001300 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc index a7185ca..514c014 100644 --- a/runtime/onert/test/core/compiler/HEScheduler.cc +++ b/runtime/onert/test/core/compiler/HEScheduler.cc @@ -351,14 +351,19 @@ protected: std::string _original_profiling_mode; }; +// +// HEScheduler tests +// + class HESchedulerTestWithExecutorParam : public HESchedulerTest, public testing::WithParamInterface { }; -// -// HEScheduler tests -// +// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - +// one time for each executor +INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, + testing::Values(LINEAR, DATAFLOW, PARALLEL)); // Test scheduler behavior for straight graph with known execution time of all nodes and permutes. TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) @@ -490,11 +495,6 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) } } -// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - -// one time for each executor -INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, - testing::Values(LINEAR, DATAFLOW, PARALLEL)); - // Test scheduler behavior for branched graph and enabled profiling mode TEST_F(HESchedulerTest, branched_graph_profiling_mode) { -- 2.7.4