From ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe Mon Sep 17 00:00:00 2001
From: Chunseok Lee <chunseok.lee@samsung.com>
Date: Mon, 15 Nov 2021 10:54:00 +0900
Subject: [PATCH 1/1] Imported Upstream version 1.19.0

---
 .ahub/tcchecker-tca/config.yaml                    |   2 -
 .gitattributes                                     |   1 +
 .github/workflows/check-format.yml                 |  64 ++++
 .github/workflows/check-pr-commit.yml              |  51 +++
 compiler/circle-execution-plan/CMakeLists.txt      |  16 +
 compiler/circle-execution-plan/README.md           |  29 ++
 compiler/circle-execution-plan/requires.cmake      |   4 +
 .../src/CircleExecutionPlan.cpp                    |  99 ++++++
 .../circle-execution-plan/src/ExecutionPlanner.cpp | 389 +++++++++++++++++++++
 .../circle-execution-plan/src/ExecutionPlanner.h   | 130 +++++++
 compiler/circle-quantizer/src/CircleQuantizer.cpp  |  44 ++-
 .../pal/cmsisnn/KernelsToBuild.lst                 |  56 +++
 compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h  |  33 ++
 .../pal/cmsisnn/PALBatchToSpaceND.h                |  37 ++
 compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h  |  70 ++++
 .../luci-interpreter/pal/cmsisnn/PALDepthToSpace.h |  35 ++
 compiler/luci-interpreter/pal/cmsisnn/PALElu.h     |  33 ++
 .../luci-interpreter/pal/cmsisnn/PALL2Normalize.h  |  34 ++
 .../luci-interpreter/pal/cmsisnn/PALL2Pool2D.h     |  33 ++
 .../luci-interpreter/pal/cmsisnn/PALLeakyRelu.h    |  32 ++
 compiler/luci-interpreter/pal/cmsisnn/PALMul.h     |  45 +++
 compiler/luci-interpreter/pal/cmsisnn/PALNeg.h     |  32 ++
 .../pal/cmsisnn/PALResizeBilinear.h                |  37 ++
 .../pal/cmsisnn/PALResizeNearestNeighbor.h         |  37 ++
 compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h |  78 +++++
 .../pal/cmsisnn/PALSpaceToBatchND.h                |  38 ++
 .../luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h |  35 ++
 compiler/luci-interpreter/pal/cmsisnn/PALSub.h     |  35 ++
 compiler/luci-interpreter/pal/cmsisnn/pal.cmake    |  62 ++++
 compiler/luci-interpreter/src/kernels/Conv2D.cpp   |  11 +
 .../luci-interpreter/src/kernels/Conv2D.test.cpp   |  39 +++
 compiler/luci-interpreter/src/kernels/Utils.cpp    |   1 -
 .../luci-interpreter/src/loader/CMakeLists.txt     |   2 +-
 .../luci-interpreter/src/loader/GraphLoader.cpp    |  62 +++-
 .../luci-interpreter/src/loader/nodes/Conv2D.cpp   |  15 +-
 compiler/luci-micro/CMakeLists.txt                 |   5 +-
 compiler/luci-micro/standalone/Toolchain.cmake     |   8 -
 compiler/luci/CMakeLists.txt                       |   4 +-
 compiler/luci/env/CMakeLists.txt                   |   8 +-
 compiler/luci/export/CMakeLists.txt                |  37 +-
 compiler/luci/export/src/CircleExporter.test.cpp   | 137 ++++++++
 compiler/luci/import/CMakeLists.txt                |  11 +-
 .../luci/import/include/luci/Import/CircleReader.h |  71 +++-
 compiler/luci/import/src/CircleReader.cpp          | 184 +++++++++-
 compiler/luci/import/src/CircleReader.test.cpp     |  67 ++++
 compiler/luci/import/src/Importer.cpp              |   1 +
 compiler/luci/import/src/Importer.test.cpp         | 285 ++++++++++++++-
 compiler/luci/lang/CMakeLists.txt                  |   8 +-
 compiler/luci/log/CMakeLists.txt                   |   8 +-
 compiler/luci/log/include/luci/Log.h               |   5 +-
 compiler/luci/log/src/Log.cpp                      |  20 +-
 compiler/luci/logex/CMakeLists.txt                 |   8 +-
 compiler/luci/partition/CMakeLists.txt             |   8 +-
 compiler/luci/partition/src/PartitionMerge.cpp     |   3 +
 compiler/luci/partition/src/PartitionPGroups.cpp   | 115 ++++++
 compiler/luci/pass/CMakeLists.txt                  |  10 +-
 compiler/luci/pass/include/luci/CircleOptimizer.h  |   5 +-
 .../include/luci/Pass/QuantizeWithMinMaxPass.h     |  22 +-
 compiler/luci/pass/src/CircleOptimizer.cpp         |  22 +-
 .../luci/pass/src/FuseActivationFunctionPass.cpp   |  10 +-
 .../pass/src/FuseActivationFunctionPass.test.cpp   |  77 ++++
 compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp  | 157 ++++++++-
 compiler/luci/plan/CMakeLists.txt                  |  19 +-
 .../luci/plan/src/CircleNodeExecutionPlan.test.cpp |  84 +++++
 compiler/luci/profile/CMakeLists.txt               |   8 +-
 compiler/luci/service/CMakeLists.txt               |   8 +-
 compiler/one-cmds/one-build                        |  44 +++
 compiler/one-cmds/one-optimize                     |  17 +
 compiler/one-cmds/one-quantize                     |  89 ++++-
 compiler/one-cmds/tests/OONE-BUILD_014.cfg         |   2 +
 compiler/one-cmds/tests/one-build_014.cfg          |  22 ++
 compiler/one-cmds/tests/one-build_014.test         |  77 ++++
 compiler/one-cmds/tests/one-build_neg_007.test     |  69 ++++
 compiler/one-cmds/tests/one-build_neg_008.test     |  41 +++
 compiler/one-cmds/tests/one-build_neg_009.test     |  41 +++
 compiler/one-cmds/tests/one-quantize_007.test      |  55 +++
 compiler/one-cmds/tests/one-quantize_008.test      |  55 +++
 compiler/one-cmds/tests/one-quantize_neg_019.test  |  50 +++
 compiler/one-cmds/tests/onecc_023.cfg              |  15 +
 compiler/one-cmds/tests/onecc_023.test             |  42 +++
 compiler/one-cmds/utils.py                         |  80 ++++-
 compiler/tflchef/core/src/ModelChef.cpp            | 103 +++++-
 compiler/tflchef/proto/tflchef.proto               |  17 +
 .../tflchef/tests/signature_def_index/test.recipe  |  60 ++++
 .../tflchef/tests/signature_def_name/test.recipe   |  60 ++++
 compiler/tfldump/src/Dump.cpp                      |  12 +-
 compiler/tflite2circle/driver/Driver.cpp           |   5 +-
 compiler/tflite2circle/include/CircleModel.h       |  12 +-
 compiler/tflite2circle/src/CircleModel.cpp         | 150 +++++---
 compiler/vconone/CMakeLists.txt                    |   2 +-
 docs/conf.py                                       |   2 +-
 docs/release/1.19/index.rst                        |  13 +
 docs/release/1.19/release-note-1.19.0.md           |   8 +
 .../CMSISSource-5.8.0/CMSISSourceConfig.cmake      |   4 +-
 .../FlatBuffers-1.10/FlatBuffersConfig.cmake       |   5 +-
 .../FlatBuffers-1.12/FlatBuffersConfig.cmake       |   5 +-
 infra/cmake/packages/FlatBuffersConfig.cmake       |   5 +-
 .../MbedOSSource-6.15/MbedOSSourceConfig.cmake     |   4 +-
 infra/debian/compiler/changelog                    |   7 +
 infra/debian/runtime/changelog                     |   6 +
 .../cmake/buildtool/config/arm-none-eabi-gcc.cmake |  66 ++++
 infra/nnfw/cmake/packages/ARMComputeConfig.cmake   |   7 +
 packaging/nnfw.spec                                |   2 +-
 res/TensorFlowLiteRecipes/Conv2D_005/test.recipe   |  34 ++
 .../Part_Tanh_FC_nobias/test.recipe                |  42 +++
 .../SignatureDef_MultiOut_000/test.recipe          |  82 +++++
 runtime/contrib/android/api/build.gradle           |   2 +-
 runtime/onert/api/include/nnfw_version.h           |   2 +-
 runtime/onert/test/core/compiler/HEScheduler.cc    |  16 +-
 109 files changed, 4330 insertions(+), 238 deletions(-)
 create mode 100644 .github/workflows/check-format.yml
 create mode 100644 .github/workflows/check-pr-commit.yml
 create mode 100644 compiler/circle-execution-plan/CMakeLists.txt
 create mode 100644 compiler/circle-execution-plan/README.md
 create mode 100644 compiler/circle-execution-plan/requires.cmake
 create mode 100644 compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
 create mode 100644 compiler/circle-execution-plan/src/ExecutionPlanner.cpp
 create mode 100644 compiler/circle-execution-plan/src/ExecutionPlanner.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALElu.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALMul.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/PALSub.h
 create mode 100644 compiler/luci-interpreter/pal/cmsisnn/pal.cmake
 delete mode 100644 compiler/luci-micro/standalone/Toolchain.cmake
 create mode 100644 compiler/luci/export/src/CircleExporter.test.cpp
 create mode 100644 compiler/luci/import/src/CircleReader.test.cpp
 create mode 100644 compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
 create mode 100644 compiler/one-cmds/tests/OONE-BUILD_014.cfg
 create mode 100644 compiler/one-cmds/tests/one-build_014.cfg
 create mode 100644 compiler/one-cmds/tests/one-build_014.test
 create mode 100644 compiler/one-cmds/tests/one-build_neg_007.test
 create mode 100644 compiler/one-cmds/tests/one-build_neg_008.test
 create mode 100644 compiler/one-cmds/tests/one-build_neg_009.test
 create mode 100644 compiler/one-cmds/tests/one-quantize_007.test
 create mode 100644 compiler/one-cmds/tests/one-quantize_008.test
 create mode 100644 compiler/one-cmds/tests/one-quantize_neg_019.test
 create mode 100644 compiler/one-cmds/tests/onecc_023.cfg
 create mode 100644 compiler/one-cmds/tests/onecc_023.test
 create mode 100644 compiler/tflchef/tests/signature_def_index/test.recipe
 create mode 100644 compiler/tflchef/tests/signature_def_name/test.recipe
 create mode 100644 docs/release/1.19/index.rst
 create mode 100644 docs/release/1.19/release-note-1.19.0.md
 create mode 100644 infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
 create mode 100644 res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe

diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 86d272d..40635d4 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -16,9 +16,7 @@ test:
       - /runtime/onert/test/graph/verifier
       - /runtime/onert/test/ir
       - /runtime/onert/test/util
-      - /tests/nnapi/src
       - /tests/nnfw_api/src
-      - /tests/tools/tflite_run/src
 
     testFile:
       - extension: cpp
diff --git a/.gitattributes b/.gitattributes
index b8eec3d..d369854 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 tests/nnapi/specs/* linguist-detectable=false
+res/* linguist-detectable=false
diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml
new file mode 100644
index 0000000..bcbc3c5
--- /dev/null
+++ b/.github/workflows/check-format.yml
@@ -0,0 +1,64 @@
+name: Check code format
+
+on:
+  push:
+    branches:
+      - master
+      - release/*
+  pull_request:
+    branches:
+      - master
+      - release/*
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  check-format:
+    name: Check format
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+
+      # C format: clang-format-8
+      # Python format: yapf==0.22.0
+      - name: Install packages
+        run: |
+          sudo apt-get install -y clang-format-8
+          python -m pip install --upgrade pip
+          pip install yapf==0.22.0
+
+      - name: Check
+        run: ./nnas format
+
+      # Upload patch file if failed
+      - name: Store archive
+        uses: actions/upload-artifact@v2
+        if: failure()
+        with:
+          name: format-patch
+          path: format.patch
+          retention-days: 3
+
+  check-copyright:
+    name: Check copyright
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          # Fetch all history and branch (default: 1)
+          # Require all history to get file creation date
+          fetch-depth: 0
+
+      - name: Check copyright
+        run: ./nnfw copyright-check
diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml
new file mode 100644
index 0000000..38c76dc
--- /dev/null
+++ b/.github/workflows/check-pr-commit.yml
@@ -0,0 +1,51 @@
+name: Check PR commit
+
+on:
+  pull_request:
+    branches:
+      - master
+      - release/*
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  check-commit-message:
+    name: Check commit message
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          # Checkout PR head commit
+          # Checkout Action use merge commit as default
+          ref: ${{ github.event.pull_request.head.sha }}
+          # Fetch all history and branch (default: 1)
+          fetch-depth: 0
+
+      - name: Get commit body
+        run: |
+          git log origin/${GITHUB_BASE_REF}..HEAD --format=%b > commit_msg.txt
+          sed '/^$/d' commit_msg.txt > commit_body.txt
+
+      - name: Check signed-off
+        run: |
+          # Check string starting from "Signed-off-by:"
+          count=$(cat commit_body.txt | grep 'Signed-off-by:' | wc -l)
+          if [[ ! "$count" -ge "1" ]]; then
+            exit 1
+          fi
+
+          echo "Signed-off-by is OK"
+
+      - name: Check body words
+        # Run if check_signed_off step is failed
+        if: ${{ always() }}
+        run: |
+          count=$(cat commit_body.txt | sed '/Signed-off-by:/d' | wc -w)
+          echo "Commit body word check: $count words"
+          if [[ "$count" -lt "5" ]]; then
+            exit 1
+          fi
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
new file mode 100644
index 0000000..115d248
--- /dev/null
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(SOURCES
+        src/CircleExecutionPlan.cpp
+        src/ExecutionPlanner.cpp
+        src/ExecutionPlanner.h
+        )
+
+add_executable(circle_execution_plan "${SOURCES}")
+target_link_libraries(circle_execution_plan foder)
+target_link_libraries(circle_execution_plan safemain)
+target_link_libraries(circle_execution_plan luci_env)
+target_link_libraries(circle_execution_plan luci_import)
+target_link_libraries(circle_execution_plan luci_export)
+target_link_libraries(circle_execution_plan luci_plan)
+target_link_libraries(circle_execution_plan arser)
+
+install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md
new file mode 100644
index 0000000..e789a55
--- /dev/null
+++ b/compiler/circle-execution-plan/README.md
@@ -0,0 +1,29 @@
+# circle-execution-plan
+
+_circle-execution-plan_ tool provides model with "execution plan".
+
+This tool takes circle file as input and returns modified circle file.
+The output circle file contains plan (`CircleNodeMemoryPlan`) information for every node.
+
+
+"execution plan" contains:
+- number which determines order in which nodes will be executed
+- memory offsets for node output tensors from the beginning of shared memory buffer
+
+In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`.
+For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data.
+
+### Execution plan building
+
+In order to build "execution plan" we use `ExecutionPlanner` class.
+The main method is `get_execution_plan()` which for each node finds and writes to its annotations 
+"execution plan". For this purpose there are two steps:
+- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
+Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
+  In the future we can add new method and find the most suitable way to graph traversal.
+  
+- determining memory offsets for nodes from the beginning of shared memory buffer, which is stored in `_offsets`.
+Now for this purpose there is one method `get_offsets_with_greedy_by_size()` that is the implementation of the "Greedy by Size" algorithm, which is described in https://arxiv.org/pdf/2001.03288.pdf article.
+  The main objective is to minimize the size of the allocated memory block.
+  In the future, other methods may also appear here to determine memory offsets for nodes
+  in the best way.
diff --git a/compiler/circle-execution-plan/requires.cmake b/compiler/circle-execution-plan/requires.cmake
new file mode 100644
index 0000000..76858f4
--- /dev/null
+++ b/compiler/circle-execution-plan/requires.cmake
@@ -0,0 +1,4 @@
+require(foder)
+require(safemain)
+require(luci)
+require(arser)
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
new file mode 100644
index 0000000..a54100b
--- /dev/null
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include "ExecutionPlanner.h"
+
+#include <arser/arser.h>
+
+#include <functional>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <cstdlib>
+
+int entry(int argc, char **argv)
+{
+  arser::Arser arser("circle_execution_plan provides model with execution plan meta information");
+
+  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
+  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  std::string input_path = arser.get<std::string>("input");
+  std::string output_path = arser.get<std::string>("output");
+
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data;
+
+  try
+  {
+    model_data = file_loader.load();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Import from input Circle file
+  luci::Importer importer;
+  auto module = importer.importModule(circle_model);
+
+  // Do main job
+  luci::ExecutionPlanner execution_planner(module->graph());
+  execution_planner.get_execution_plan();
+
+  // Export to output Circle file
+  luci::CircleExporter exporter;
+  luci::CircleFileExpContract contract(module.get(), output_path);
+
+  if (!exporter.invoke(&contract))
+  {
+    std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+    return 255;
+  }
+
+  return 0;
+}
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
new file mode 100644
index 0000000..c37d1e5
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionPlanner.h"
+#include <loco/IR/Algorithm.h>
+#include <luci/UserSettings.h>
+
+namespace luci
+{
+namespace
+{
+
+constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max();
+
+uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size,
+                             uint32_t stride, uint32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+  }
+}
+
+// Method finds (if necessary) size for im2col temporary tensor.
+uint32_t compute_im2col_size(const luci::CircleConv2D *conv)
+{
+  auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+  auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+  auto padding = (conv->padding());
+  uint32_t stride_height = conv->stride()->h();
+  uint32_t stride_width = conv->stride()->w();
+
+  uint32_t dilation_height_factor = conv->dilation()->h();
+  uint32_t dilation_width_factor = conv->dilation()->w();
+
+  uint32_t filter_height = filter->dim(1).value();
+  uint32_t filter_width = filter->dim(2).value();
+
+  const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+  const bool need_non_dilated_im2col =
+    stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+  bool need_im2col =
+    conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
+
+  if (!need_im2col)
+  {
+    return 0;
+  }
+
+  uint32_t input_depth = conv_input->dim(3).value();
+  uint32_t input_height = conv_input->dim(1).value();
+  uint32_t input_width = conv_input->dim(2).value();
+
+  uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height,
+                                               dilation_height_factor);
+  uint32_t output_width =
+    compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor);
+
+  uint32_t batches = conv_input->dim(0).value();
+
+  return batches * output_height * output_width * input_depth * filter_height * filter_width *
+         size(conv_input->dtype());
+}
+
+} // namespace
+
+void ExecutionPlanner::get_execution_plan()
+{
+  get_default_execution_order_plan();
+  _required_size = get_offsets_with_greedy_by_size();
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    luci::CircleNodeExecutionPlan execution_plan(i, _offsets[i]);
+    luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                             execution_plan);
+  }
+  auto settings = luci::UserSettings::settings();
+  settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
+}
+
+void ExecutionPlanner::get_default_execution_order_plan()
+{
+  // Get execution order in _ordered_nodes
+  _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+}
+
+void ExecutionPlanner::get_usage_interval()
+{
+  // Initialize vectors of first and last nodes for usage interval
+  _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+  _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+
+  // Vector for count usages
+  std::vector<int> usages_counts(_ordered_nodes.size(), 0);
+
+  auto allocate = [this](uint32_t node, uint32_t tensor) {
+    if (_alloc_node[tensor] != nodeNotAssigned)
+    {
+      return;
+    }
+    assert(_dealloc_node[tensor] == nodeNotAssigned);
+    _alloc_node[tensor] = node;
+  };
+
+  auto deallocate = [this](uint32_t node, uint32_t tensor) {
+    assert(_dealloc_node[tensor] == nodeNotAssigned);
+    _dealloc_node[tensor] = node;
+  };
+
+  // Increase refcounts for graph outputs and inputs nodes
+  for (auto &output_node : output_nodes(_graph))
+  {
+    auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node);
+    size_t index = std::distance(_ordered_nodes.begin(), it);
+    usages_counts[index]++;
+  }
+
+  for (auto &input_node : input_nodes(_graph))
+  {
+    auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node);
+    size_t index = std::distance(_ordered_nodes.begin(), it);
+    usages_counts[index]++;
+    allocate(0, index);
+  }
+
+  // Increase refcounts of usage for all nodes in _ordered_nodes vector
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto node = _ordered_nodes.at(i);
+    auto prev_nodes = preds(node);
+    for (auto &prev_node : prev_nodes)
+    {
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      usages_counts[index]++;
+    }
+  }
+
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto node = _ordered_nodes.at(i);
+    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      allocate(0, i);
+    }
+    allocate(i, i);
+
+    auto prev_nodes = preds(node);
+    for (auto &prev_node : prev_nodes)
+    {
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      usages_counts[index]--;
+      if (usages_counts[index] == 0)
+      {
+        deallocate(i, index);
+      }
+    }
+  }
+}
+
+uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
+{
+  get_usage_interval();
+  auto required_size = greedy_by_size_approach();
+
+  _offsets.resize(_ordered_nodes.size());
+  for (const auto &alloc : _alloc_node_inform_vector)
+  {
+    // Fill offsets vector: first go offset for current node and then should go offsets for
+    // temporaries tensors
+    if (alloc.is_temp)
+    {
+      _offsets[alloc.node_num].push_back(alloc.offset);
+    }
+    else
+    {
+      _offsets[alloc.node_num].insert(_offsets[alloc.node_num].begin(), alloc.offset);
+    }
+  }
+  return required_size;
+}
+
+uint32_t ExecutionPlanner::greedy_by_size_approach()
+{
+  size_t result_size = 0;
+  create_alloc_node_inform_vector(false, false, false);
+  std::vector<AllocationNodeInformation> ordered_alloc_inform;
+  for (auto &current_node : _alloc_node_inform_vector)
+  {
+    if (current_node.size == 0)
+    {
+      current_node.offset = 0;
+      continue;
+    }
+    const uint32_t offsetNotAssigned = std::numeric_limits<uint32_t>::max();
+    size_t best_offset = offsetNotAssigned;
+    uint32_t best_offset_fit = offsetNotAssigned;
+
+    uint32_t current_offset = 0;
+
+    for (const auto &alloc_inform : ordered_alloc_inform)
+    {
+      if ((alloc_inform.last_node < current_node.first_node ||
+           alloc_inform.first_node > current_node.last_node))
+      {
+        continue;
+      }
+
+      if (current_offset + current_node.size <= alloc_inform.offset &&
+          alloc_inform.offset - current_offset < best_offset_fit)
+      {
+        best_offset = current_offset;
+        best_offset_fit = alloc_inform.offset - current_offset;
+      }
+      current_offset = std::max(current_offset, alloc_inform.offset + alloc_inform.size);
+    }
+    if (best_offset == offsetNotAssigned)
+    {
+      best_offset = current_offset;
+    }
+
+    result_size = std::max(result_size, best_offset + current_node.size);
+    current_node.offset = best_offset;
+
+    auto insertion_it =
+      std::upper_bound(ordered_alloc_inform.begin(), ordered_alloc_inform.end(), current_node);
+    ordered_alloc_inform.insert(insertion_it, current_node);
+  }
+  return result_size;
+}
+
+void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
+                                                       bool null_im2col)
+{
+  auto node_compare = [this](const AllocationNodeInformation &alloc_1,
+                             const AllocationNodeInformation &alloc_2) {
+    auto idx1 = alloc_1.node_num;
+    auto idx2 = alloc_2.node_num;
+
+    if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned)
+    {
+      if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+      {
+        return idx1 < idx2;
+      }
+      return true;
+    }
+    if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+    {
+      return false;
+    }
+
+    auto size_1 = alloc_1.size;
+    auto size_2 = alloc_2.size;
+
+    if (size_1 != size_2)
+    {
+      return size_1 > size_2;
+    }
+    return this->_alloc_node[idx1] < this->_alloc_node[idx2];
+  };
+
+  _alloc_node_inform_vector.resize(_ordered_nodes.size());
+
+  for (size_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    auto node_size = 1;
+    for (uint32_t axis = 0; axis < circle_node->rank(); ++axis)
+    {
+      node_size *= circle_node->dim(axis).value();
+    }
+    node_size *= size(circle_node->dtype());
+
+    _alloc_node_inform_vector[i].node_num = i;
+    _alloc_node_inform_vector[i].first_node = _alloc_node[i];
+    _alloc_node_inform_vector[i].last_node = _dealloc_node[i];
+
+    const auto *const_node = dynamic_cast<const luci::CircleConst *>(circle_node);
+    if (i == 0 && null_inputs)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else if (const_node && null_consts)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else
+    {
+      _alloc_node_inform_vector[i].size = node_size;
+    }
+
+    // Im2col
+    auto opcode = circle_node->opcode();
+    if (opcode == luci::CircleOpcode::CONV_2D)
+    {
+      auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+      auto im2col_size = compute_im2col_size(conv);
+      if (im2col_size > 0)
+      {
+        AllocationNodeInformation temp_alloc;
+
+        if (null_im2col)
+        {
+          temp_alloc.size = 0;
+        }
+        else
+        {
+          temp_alloc.size = im2col_size;
+        }
+
+        temp_alloc.first_node = i - 1;
+        temp_alloc.last_node = i + 1;
+        temp_alloc.node_num = i;
+        temp_alloc.is_temp = true;
+
+        _alloc_node_inform_vector.push_back(temp_alloc);
+        _alloc_node.push_back(i);
+        _dealloc_node.push_back(i);
+      }
+    }
+  }
+  // Sort _alloc_node_inform_vector with node_compare for the greedy by size approach.
+  std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), node_compare);
+}
+
+void ExecutionPlanner::dump_inform()
+{
+  uint32_t max_breadth = 0;
+
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    auto current_node_it = std::find_if(
+      _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+      [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+    for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
+    {
+      auto first_node = _alloc_node[j];
+      auto last_node = _dealloc_node[j];
+
+      auto it = std::find_if(
+        _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+        [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+      if (i >= first_node && i <= last_node)
+      {
+        current_node_it->breadth += it->size;
+      }
+    }
+    if (max_breadth < current_node_it->breadth)
+    {
+      max_breadth = current_node_it->breadth;
+    }
+
+    auto node = loco::must_cast<luci::CircleNode *>(_ordered_nodes.at(i));
+    printf("node_num = %d   node_name = %s    node_size = %d    node_offset = %d  node_breadth = "
+           "%u node_first_node = %d   node_last_node = %d\n",
+           i, node->name().c_str(), current_node_it->size, current_node_it->offset,
+           current_node_it->breadth, current_node_it->first_node, current_node_it->last_node);
+  }
+  printf("Lower bound is = %u\n", max_breadth);
+  std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+            [](const AllocationNodeInformation &first, const AllocationNodeInformation &second) {
+              if (first.breadth != second.breadth)
+                return first.breadth > second.breadth;
+              return first.node_num < second.node_num;
+            });
+}
+
+} // namespace luci
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
new file mode 100644
index 0000000..8e3d9b4
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLANNER_H
+#define CIRCLE_EXECUTION_PLANNER_H
+
+#include <luci/IR/Module.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci
+{
+// struct for additional information for the node. it helps build allocations plan for nodes.
+struct AllocationNodeInformation
+{
+
+  AllocationNodeInformation()
+  {
+    offset = 0;
+    size = 0;
+    node_num = -1;
+    first_node = -1;
+    last_node = -1;
+    is_temp = false;
+    breadth = 0;
+  }
+  // memory offset from the beginning of the buffer
+  uint32_t offset;
+  // node required size
+  uint32_t size;
+  // the value assigned to the node
+  uint32_t node_num;
+  // the value of the node_num of the node when current node first use.
+  // Used to build the usage interval of the current node
+  uint32_t first_node;
+  // the value of the node_num of the node when current node last use.
+  // Used to build the usage interval of the current node
+  uint32_t last_node;
+  // is the current node temporary or not
+  bool is_temp;
+  // operation breadth of current node
+  uint32_t breadth;
+
+  bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
+};
+
+class ExecutionPlanner
+{
+public:
+  ExecutionPlanner() = delete;
+  explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; };
+
+  // Method provides execution plan, which contains execution order and
+  // memory offsets for all nodes in _graph.
+  // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
+  void get_execution_plan();
+
+private:
+  // Method gets default execution order plan and saves it in _ordered_nodes vector.
+  // There can be different variants of execution order and this method provides main one.
+  void get_default_execution_order_plan();
+
+  // Method provides nodes with usage interval information.
+  void get_usage_interval();
+
+  // Method dumps execution plan information.
+  void dump_inform();
+
+  // Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach.
+  // It saves offsets in _offsets vector.
+  // Return: required size of buffer.
+  uint32_t get_offsets_with_greedy_by_size();
+
+  // Realization of greedy by size approach to find offsets for nodes.
+  uint32_t greedy_by_size_approach();
+
+  // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
+  // null_consts = true - size of const nodes will be equal 0;
+  // null_inputs = true - size of input nodes will be equal 0;
+  // null_im2col = true - size of im2col nodes will be equal 0;
+  // It using if we don't want to take input(const or im2col) nodes into account
+  // when determining offsets and calculating the required buffer size. This is uses for
+  // experiments.
+  void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
+                                       bool null_im2col = false);
+
+  // Stores allocation additional information for the all nodes from _graph.
+  std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
+
+  // Stores nodes in execution order.
+  std::vector<loco::Node *> _ordered_nodes;
+
+  // Stores nodes memory offsets in arena buffer.
+  std::vector<std::vector<uint32_t>> _offsets;
+
+  // Stores positions of nodes in _ordered_nodes vector,
+  // where node in i'th position in this vector first use.
+  // For example, if i'th position of _alloc_node stores j value, then
+  // the node from _ordered_nodes in j'th position is the node when we should allocate (first use)
+  // the node from _ordered_nodes in i'th position.
+  std::vector<uint32_t> _alloc_node;
+
+  // Stores positions of nodes in _ordered_nodes vector,
+  // where node in i'th position in this vector last use.
+  // For example, if i'th position of _alloc_node stores j value, then
+  // the node from _ordered_nodes in j'th position is the node when we can deallocate (last use)
+  // the node from _ordered_nodes in i'th position.
+  std::vector<uint32_t> _dealloc_node;
+
+  loco::Graph *_graph;
+
+  // Required memory size.
+  uint32_t _required_size = 0;
+};
+
+} // namespace luci
+
+#endif // CIRCLE_EXECUTION_PLANNER_H
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 1a09a8a..57ac30a 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -88,24 +88,24 @@ int entry(int argc, char **argv)
     .type(arser::DataType::STR_VEC)
     .required(false)
     .help("Quantize-dequantize weight values required action before quantization. "
-          "Three arguments required: input_dtype(float32) "
-          "output_dtype(uint8) granularity(layer, channel)");
+          "Three arguments required: input_model_dtype(float32) "
+          "output_model_dtype(uint8) granularity(layer, channel)");
 
   arser.add_argument(qwmm)
     .nargs(3)
     .type(arser::DataType::STR_VEC)
     .required(false)
     .help("Quantize with min/max values. "
-          "Three arguments required: input_dtype(float32) "
-          "output_dtype(uint8) granularity(layer, channel)");
+          "Three arguments required: input_model_dtype(float32) "
+          "output_model_dtype(uint8) granularity(layer, channel)");
 
   arser.add_argument(rq)
     .nargs(2)
     .type(arser::DataType::STR_VEC)
     .required(false)
     .help("Requantize a quantized model. "
-          "Two arguments required: input_dtype(int8) "
-          "output_dtype(uint8)");
+          "Two arguments required: input_model_dtype(int8) "
+          "output_model_dtype(uint8)");
 
   arser.add_argument(fq)
     .nargs(3)
@@ -116,6 +116,18 @@ int entry(int argc, char **argv)
           "Three arguments required: tensor_name(string), "
           "scale(float) zero_point(int)");
 
+  arser.add_argument("--input_type")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Input type of quantized model (uint8 or int16)");
+
+  arser.add_argument("--output_type")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Output type of quantized model (uint8 or int16)");
+
   arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
   arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
 
@@ -163,8 +175,8 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::QuantizeDequantizeWeights);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
     options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
   }
 
@@ -178,9 +190,17 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::QuantizeWithMinMax);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
     options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+    if (arser["--input_type"])
+      options->param(AlgorithmParameters::Quantize_input_type,
+                     arser.get<std::string>("--input_type"));
+
+    if (arser["--output_type"])
+      options->param(AlgorithmParameters::Quantize_output_type,
+                     arser.get<std::string>("--output_type"));
   }
 
   if (arser[rq])
@@ -193,8 +213,8 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::Requantize);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
   }
 
   if (arser[fq])
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 0000000..771974a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,56 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 0000000..21e6329
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 0000000..4dd77ff
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 0000000..0a8ae4e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &im2col_shape,
+                        float *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                        uint8 *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                              im2col_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                                  int8 *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 0000000..8463e57
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 0000000..4089d0a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 0000000..f84742a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 0000000..38a302f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 0000000..9ccd222
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 0000000..2b46b10
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const float *input2_data, const tflite::RuntimeShape &output_shape,
+                       float *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+                                      const tflite::RuntimeShape &input1_shape,
+                                      const float *input1_data,
+                                      const tflite::RuntimeShape &input2_shape,
+                                      const float *input2_data,
+                                      const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 0000000..be5903a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 0000000..cc9f0fd
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 0000000..f4d5a6e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 0000000..6bbda48
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+                            const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                            const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int32_t mult = params.input_multiplier;
+  const int32_t shift = params.input_left_shift;
+  const int32_t diff_min = params.diff_min;
+
+  arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 0000000..fdddaa9
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 0000000..816b7f6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 0000000..ea57578
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 0000000..9a25a3c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,62 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+    nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${CMSISSource_DIR}/CMSIS/NN/Include"
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index fb5e063..5647f4c 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -129,6 +129,17 @@ void Conv2D::configure()
     auto im2col = getOutputTensors()[1];
     im2col->set_allocatable(false);
   }
+
+  switch (_params.activation)
+  {
+    case Activation::NONE:
+    case Activation::RELU:
+    case Activation::RELU6:
+    case Activation::RELU_N1_TO_1:
+      break;
+    default:
+      throw std::runtime_error("Unsupported fused activation");
+  }
 }
 
 void Conv2D::execute() const
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index 277c280..0fe6ef7 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -663,6 +663,45 @@ TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
   EXPECT_ANY_THROW(kernel.configure());
 }
 
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::TANH;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index 6e83e37..586cfa1 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -32,7 +32,6 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
   switch (activation)
   {
     case Activation::NONE:
-    case Activation::TANH:
       *activation_min = std::numeric_limits<float>::lowest();
       *activation_max = std::numeric_limits<float>::max();
       break;
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index 974283a..2cde99f 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -23,7 +23,7 @@ target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER
 
 target_link_libraries(${LUCI_INTERPRETER_LOADER}
         PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
-        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common)
+        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index b55e7c5..a14442e 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -18,6 +18,7 @@
 
 #include "loader/KernelBuilder.h"
 
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 #include <loco/IR/Algorithm.h>
 
 namespace luci_interpreter
@@ -155,6 +156,15 @@ void GraphLoader::loadTensors()
     auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
                                            node->name());
 
+    // If node has execution plan then read memory offsets for nodes
+    // from the beginning of shared memory buffer. Used in Static Memory Manager.
+    if (luci::has_execution_plan(node))
+    {
+      auto execution_plan = luci::get_execution_plan(node);
+      assert(!execution_plan.offsets().empty());
+      tensor->set_offset(execution_plan.offsets().front());
+    }
+
     if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
     {
       size_t data_size{};
@@ -199,16 +209,54 @@ void GraphLoader::loadOperators()
   KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
 
   // Create kernels for executable nodes. This has to be done in execution order.
-  for (const loco::Node *loco_node :
-       loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph))))
+  auto graph = const_cast<loco::Graph *>(_graph);
+
+  auto const graph_nodes = loco::all_nodes(graph);
+
+  // Checking for execution plan in node annotations.
+  bool has_execution_annotation = true;
+  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    if (!luci::has_execution_plan(circle_node))
+      has_execution_annotation = false;
+  };
+  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+  if (has_execution_annotation)
   {
-    const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+    // Build ordered_nodes vector that stores the order of execution of graph nodes.
+    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
 
-    if (isExecutableNode(node))
+    auto const filler = [&ordered_nodes](auto const node) {
+      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+      auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+      ordered_nodes.at(position) = circle_node;
+    };
+    std::for_each(begin(graph_nodes), end(graph_nodes), filler);
+
+    for (auto node : ordered_nodes)
     {
-      std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
-      _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
-      _runtime_graph->addKernel(std::move(kernel));
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+  else
+  {
+    // If it is impossible to build the execution order plan,
+    // then we use the default postorder_traversal approach.
+    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
+    {
+      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
     }
   }
 }
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index 71c8ef3..22fd1ac 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -17,6 +17,7 @@
 #include "Builders.h"
 
 #include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 
 namespace luci_interpreter
 {
@@ -31,13 +32,25 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
 
   const Tensor *input = helper.getInputTensor(node->input());
   const Tensor *filter = helper.getInputTensor(node->filter());
-  const Tensor *bias = helper.getInputTensor(node->bias());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
   Tensor *output = helper.getOutputTensor(node);
 
   auto im2col =
     std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
   im2col->set_observable(false);
   im2col->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for im2col temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in im2col.
+      im2col->set_offset(execution_plan.offsets().at(1));
+  }
   Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
 
   Conv2DParams params{};
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
index d936e12..9434708 100644
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -13,11 +13,12 @@ endif()
 set(CMAKE_ARM_OPTIONS
   -DLUCI_INTERPRETER_STATIC=ON
   -DLUCI_STATIC=ON
-  "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake"
+  -DBUILD_CMSIS_NN_FUNCTIONS=ON
+  -DTARGET_CPU=cortex-m7
+  "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake"
   "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
   "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
   "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
-  -DCPU_ARCH=arm
   -DC_COMPILER=${ARM_C_COMPILER}
   -DCXX_COMPILER=${ARM_CXX_COMPILER}
   -DASM_COMPILER=${ARM_ASM_COMPILER}
diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake
deleted file mode 100644
index 2d23b5d..0000000
--- a/compiler/luci-micro/standalone/Toolchain.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-set(CMAKE_SYSTEM_NAME Generic)
-
-set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-set(CMAKE_C_COMPILER "${C_COMPILER}")
-set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
-set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
-set(CMAKE_OBJCOPY "${OBJCOPY}")
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
index 9dcf1b5..b92eefb 100644
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -4,9 +4,9 @@
 #
 # Currently this feature is used for luci-interpreter MCU builds.
 if (STATIC_LUCI)
-  set(LIBRARY_TYPE "STATIC")
+  set(LUCI_LIBRARY_TYPE "STATIC")
 else()
-  set(LIBRARY_TYPE "SHARED")
+  set(LUCI_LIBRARY_TYPE "SHARED")
 endif()
 
 add_subdirectory(env)
diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt
index bba5155..7025db2 100644
--- a/compiler/luci/env/CMakeLists.txt
+++ b/compiler/luci/env/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_env ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_env ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_env PUBLIC include)
 target_link_libraries(luci_env PRIVATE nncc_common)
 install(TARGETS luci_env DESTINATION lib)
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index 2b41a62..a267d0e 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -1,13 +1,12 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
-# TODO enable tests
-#file(GLOB_RECURSE TESTS "src/*.test.cpp")
-#list(REMOVE_ITEM SOURCES ${TESTS})
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-    set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_export ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_export ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_export PRIVATE src)
 target_include_directories(luci_export PUBLIC include)
 target_link_libraries(luci_export PRIVATE luci_lang)
@@ -26,13 +25,17 @@ install(TARGETS luci_export DESTINATION lib)
 install(DIRECTORY include/ DESTINATION include
         FILES_MATCHING PATTERN "*.h")
 
-#if(NOT ENABLE_TEST)
-#  return()
-#endif(NOT ENABLE_TEST)
-#
-#nnas_find_package(GTest REQUIRED)
-#
-#GTest_AddTest(luci_export_test ${TESTS})
-#target_include_directories(luci_export_test PRIVATE src)
-#target_link_libraries(luci_export_test luci_export)
-#target_link_libraries(luci_export_test oops)
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_export_test ${TESTS})
+target_include_directories(luci_export_test PRIVATE src)
+target_link_libraries(luci_export_test luci_export)
+target_link_libraries(luci_export_test luci_plan)
+target_link_libraries(luci_export_test luci_lang)
+target_link_libraries(luci_export_test mio_circle)
+target_link_libraries(luci_export_test luci_env)
+target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/src/CircleExporter.test.cpp b/compiler/luci/export/src/CircleExporter.test.cpp
new file mode 100644
index 0000000..5898f9d
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporter.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleExporter.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/IR/Nodes/CircleRelu.h>
+#include <luci/UserSettings.h>
+
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
+
+#include <gtest/gtest.h>
+
+class SampleGraphContract : public luci::CircleExporter::Contract
+{
+public:
+  SampleGraphContract() : luci::CircleExporter::Contract(), _buffer(new std::vector<char>)
+  {
+    // create needed entities
+    _g = loco::make_graph();
+    auto graph_input = _g->inputs()->create();
+    auto graph_output = _g->outputs()->create();
+    input_node = _g->nodes()->create<luci::CircleInput>();
+    output_node = _g->nodes()->create<luci::CircleOutput>();
+    relu_node = _g->nodes()->create<luci::CircleRelu>();
+
+    // link nodes and link them to graph
+    relu_node->features(input_node);
+    output_node->from(relu_node);
+    input_node->index(graph_input->index());
+    output_node->index(graph_output->index());
+
+    // Set needed properties
+    input_node->name("input");
+    output_node->name("output");
+    relu_node->name("relu");
+    input_node->dtype(loco::DataType::FLOAT32);
+
+    graph_input->shape({1, 2, 3, 4});
+    graph_input->dtype(loco::DataType::FLOAT32);
+
+    graph_output->shape({1, 2, 3, 4});
+    graph_output->dtype(loco::DataType::FLOAT32);
+  }
+
+  loco::Graph *graph(void) const override { return _g.get(); }
+
+public:
+  bool store(const char *ptr, const size_t size) const override
+  {
+    _buffer->resize(size);
+    std::copy(ptr, ptr + size, _buffer->begin());
+    return true;
+  }
+
+  const std::vector<char> &get_buffer() { return *_buffer; }
+
+public:
+  luci::CircleInput *input_node;
+  luci::CircleOutput *output_node;
+  luci::CircleRelu *relu_node;
+
+private:
+  std::unique_ptr<loco::Graph> _g;
+  std::unique_ptr<std::vector<char>> _buffer;
+};
+
+TEST(CircleExport, export_execution_plan)
+{
+  SampleGraphContract contract;
+  uint32_t reference_order = 1;
+  uint32_t reference_offset = 100u;
+  luci::add_execution_plan(contract.relu_node,
+                           luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+  luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, true);
+  luci::CircleExporter exporter;
+
+  exporter.invoke(&contract);
+
+  ASSERT_FALSE(contract.get_buffer().empty());
+  std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+  ASSERT_NE(model.get(), nullptr);
+  ASSERT_EQ(model->metadata[0]->name, "ONE_execution_plan_table");
+  auto metadata_buffer = model->metadata[0]->buffer;
+  auto &buffer = model->buffers[metadata_buffer]->data;
+  ASSERT_EQ(buffer.size(), 20);
+  uint32_t *raw_table_contents = reinterpret_cast<uint32_t *>(buffer.data());
+
+  auto num_entries = raw_table_contents[0];
+  ASSERT_EQ(num_entries, 1);
+  auto node_id = raw_table_contents[1];
+  ASSERT_EQ(node_id, 1); // relu node is second (aka id 1) in tological sort in exporter
+  auto node_plan_size = raw_table_contents[2];
+  ASSERT_EQ(node_plan_size, 2); // 1 for execution order, 1 for memory offset value
+  auto node_plan_order = raw_table_contents[3];
+  ASSERT_EQ(node_plan_order,
+            reference_order); // this value goes from CircleNodeExecutionPlan initialization
+  auto node_plan_offset = raw_table_contents[4];
+  ASSERT_EQ(node_plan_offset,
+            reference_offset); // this value goes from CircleNodeExecutionPlan initialization
+}
+
+TEST(CircleExport, export_execution_plan_nosetting_NEG)
+{
+  SampleGraphContract contract;
+  uint32_t reference_order = 1;
+  uint32_t reference_offset = 100u;
+  luci::add_execution_plan(contract.relu_node,
+                           luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+  luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, false);
+  luci::CircleExporter exporter;
+
+  exporter.invoke(&contract);
+
+  ASSERT_FALSE(contract.get_buffer().empty());
+  std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+  ASSERT_NE(model.get(), nullptr);
+  ASSERT_EQ(model->metadata.size(), 0);
+}
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 1df569d..6630cab 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_import ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_import ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_import PRIVATE src)
 target_include_directories(luci_import PUBLIC include)
 target_link_libraries(luci_import PUBLIC luci_lang)
@@ -33,3 +33,6 @@ GTest_AddTest(luci_import_test ${TESTS})
 target_include_directories(luci_import_test PRIVATE src)
 target_link_libraries(luci_import_test luci_import)
 target_link_libraries(luci_import_test oops)
+target_link_libraries(luci_import_test luci_plan)
+target_link_libraries(luci_import_test luci_lang)
+target_link_libraries(luci_import_test mio_circle)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h
index b9697fb..fb38ba9 100644
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -36,10 +36,19 @@ namespace luci
 {
 
 bool is_valid(const circle::OperatorCodeT &opcode);
+bool is_valid(const circle::OperatorCode *opcode);
+
 bool is_custom(const circle::OperatorCodeT &opcode);
+bool is_custom(const circle::OperatorCode *opcode);
+
 std::string opcode_name(const circle::OperatorCodeT &opcode);
+std::string opcode_name(const circle::OperatorCode *opcode);
+
 const char *tensor_name(const circle::TensorT &tensor);
+const char *tensor_name(const circle::Tensor *tensor);
+
 const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor);
 
 loco::DataType luci_datatype(circle::TensorType type);
 FusedActFunc luci_actfunc(const circle::ActivationFunctionType type);
@@ -49,29 +58,70 @@ luci::CircleFullyConnected::WeightsFormat
 luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
 std::unique_ptr<CircleQuantParam>
 luci_quantparam(const circle::QuantizationParametersT *quantization);
+std::unique_ptr<CircleQuantParam>
+luci_quantparam(const circle::QuantizationParameters *quantization);
 
 /// @brief Copy common tensor attributes such as name, type, etc. to node.
 void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node);
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node);
+
+/**
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
+ */
+template <typename T> class VectorWrapper
+{
+public:
+  explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+  const T *data() const;
+  uint32_t size() const;
+
+  using iterator = typename flatbuffers::Vector<T>::const_iterator;
+  iterator begin() const;
+  iterator end() const;
+
+  using value_type = typename flatbuffers::Vector<T>::return_type;
+  value_type at(uint32_t i) const;
+  value_type operator[](uint32_t i) const;
+
+  bool null() const;
+  bool empty() const;
+
+private:
+  const flatbuffers::Vector<T> *_vector;
+};
+
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+  return VectorWrapper<T>(vec);
+}
 
 /**
  * @brief Loads Circle file and provides helpers to access attributes
  */
 class CircleReader
 {
-private:
+private: // unpack API
   using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
   using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
   using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
   using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
   using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
 
+private: // direct API
+  using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+  using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+  using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+  using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+  using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
+
   using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
   using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
 
 public:
   CircleReader() = default;
 
-public:
+public: // unpack API
   const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
   const CircleBuffers_t &buffers() const { return _model->buffers; }
   const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
@@ -89,6 +139,20 @@ public:
   circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
   std::string opcode_name(const circle::OperatorT &op) const;
 
+public: // direct API
+  CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); }
+  CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); }
+  CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); }
+  CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); }
+  VectorWrapper<int32_t> native_inputs() const { return wrap(_native_subgraph->inputs()); }
+  VectorWrapper<int32_t> native_outputs() const { return wrap(_native_subgraph->outputs()); }
+  std::string native_name() const { return _native_subgraph->name()->str(); }
+  circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); }
+  CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); }
+
+  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+  std::string opcode_name(const circle::Operator *op) const;
+
 public:
   bool parse(const circle::Model *model);
   bool select_subgraph(uint32_t subgraph);
@@ -97,8 +161,9 @@ private:
   std::unique_ptr<const circle::ModelT> _model;
   const circle::SubGraphT *_current_subgraph{nullptr};
 
-  const circle::Model *_model_ptr{nullptr};
+  const circle::Model *_native_model{nullptr};
   const CircleTensorsPtr_t *_tensors_ptr{nullptr};
+  const circle::SubGraph *_native_subgraph{nullptr};
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
index 6c9bf3a..14917ba 100644
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -29,12 +29,26 @@ bool is_valid(const circle::OperatorCodeT &opcode)
   return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
 }
 
+bool is_valid(const circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  circle::BuiltinOperator code = opcode->builtin_code();
+  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
+}
+
 bool is_custom(const circle::OperatorCodeT &opcode)
 {
   circle::BuiltinOperator code = opcode.builtin_code;
   return (code == circle::BuiltinOperator_CUSTOM);
 }
 
+bool is_custom(const circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  circle::BuiltinOperator code = opcode->builtin_code();
+  return (code == circle::BuiltinOperator_CUSTOM);
+}
+
 std::string opcode_name(const circle::OperatorCodeT &opcode)
 {
   if (!is_valid(opcode))
@@ -56,6 +70,30 @@ std::string opcode_name(const circle::OperatorCodeT &opcode)
   return circle::EnumNameBuiltinOperator(code);
 }
 
+std::string opcode_name(const circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    auto custom_code = opcode->custom_code()->str();
+    if (custom_code.empty())
+      return "(invalid custom)";
+
+    return custom_code;
+  }
+
+  circle::BuiltinOperator code = opcode->builtin_code();
+  return circle::EnumNameBuiltinOperator(code);
+}
+
 const char *tensor_name(const circle::TensorT &tensor)
 {
   static const char *kEmptyTensorName = "(noname)";
@@ -66,11 +104,30 @@ const char *tensor_name(const circle::TensorT &tensor)
   return kEmptyTensorName;
 }
 
+const char *tensor_name(const circle::Tensor *tensor)
+{
+  assert(tensor != nullptr);
+
+  static const char *kEmptyTensorName = "(noname)";
+  const auto tensor_name = tensor->name()->c_str();
+
+  if (!std::string(tensor_name).empty())
+    return tensor_name;
+
+  return kEmptyTensorName;
+}
+
 const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
 {
   return tensor.quantization.get();
 }
 
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor)
+{
+  assert(tensor != nullptr);
+  return tensor->quantization();
+}
+
 loco::DataType luci_datatype(const circle::TensorType type)
 {
   switch (type)
@@ -235,6 +292,16 @@ luci_quantparam(const circle::QuantizationParametersT *quantization)
   return nullptr;
 }
 
+std::unique_ptr<CircleQuantParam> luci_quantparam(const circle::QuantizationParameters *qparams)
+{
+  // create temporary unpacked API object
+  assert(qparams != nullptr);
+  circle::QuantizationParametersT quantization;
+  qparams->UnPackTo(&quantization);
+
+  return luci_quantparam(&quantization);
+}
+
 std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParametersT *sparsity)
 {
   assert(sparsity);
@@ -257,6 +324,16 @@ std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParamete
   return sparsityparam;
 }
 
+std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParameters *sparparam)
+{
+  // create temporary unpacked API object
+  assert(sparparam != nullptr);
+  circle::SparsityParametersT sparsity;
+  sparparam->UnPackTo(&sparsity);
+
+  return luci_sparsityparam(&sparsity);
+}
+
 void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
 {
   node->name(tensor_name(tensor));
@@ -292,6 +369,45 @@ void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
   }
 }
 
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
+{
+  assert(tensor != nullptr);
+
+  node->name(tensor_name(tensor));
+  node->dtype(luci_datatype(tensor->type()));
+
+  const auto tensor_shape_signature = wrap(tensor->shape_signature());
+  const auto tensor_shape = wrap(tensor->shape());
+  assert(tensor_shape_signature.size() == 0 ||
+         tensor_shape_signature.size() == tensor_shape.size());
+
+  const auto dims = tensor_shape; // in NHWC
+  node->rank(dims.size());
+  for (uint32_t r = 0; r < dims.size(); ++r)
+  {
+    if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
+      node->dim(r).unset();
+    else
+      node->dim(r).set(dims[r]);
+  }
+
+  const auto quantization = tensor->quantization();
+  if (quantization != nullptr)
+  {
+    auto quantparam = luci_quantparam(quantization);
+    if (quantparam)
+      node->quantparam(std::move(quantparam));
+  }
+
+  const auto sparsity = tensor->sparsity();
+  if (sparsity != nullptr)
+  {
+    auto sparsityparam = luci_sparsityparam(sparsity);
+    if (sparsityparam)
+      node->sparsityparam(std::move(sparsityparam));
+  }
+}
+
 circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
 {
   const auto &op_codes = opcodes();
@@ -326,7 +442,7 @@ bool CircleReader::parse(const circle::Model *model)
   _model.reset(model->UnPack());
 
   // for direct pointer access
-  _model_ptr = model;
+  _native_model = model;
 
   return true;
 }
@@ -342,12 +458,72 @@ bool CircleReader::select_subgraph(uint32_t sgindex)
   _current_subgraph = _model->subgraphs[sgindex].get();
 
   // for direct pointer access
-  auto subgraphs = _model_ptr->subgraphs();
-  const circle::SubGraph *subgraph = (*subgraphs)[sgindex];
+  auto subgraphs = _native_model->subgraphs();
+  assert(subgraphs != nullptr);
+
+  _native_subgraph = subgraphs->Get(sgindex);
+  assert(_native_subgraph != nullptr);
 
-  _tensors_ptr = subgraph->tensors();
+  _tensors_ptr = _native_subgraph->tensors();
 
   return true;
 }
 
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+  // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+  return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+  return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+  return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+  return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+  if (i >= size())
+  {
+    // TODO find better error message
+    throw std::range_error("Access to prohibited vector element");
+  }
+
+  return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+  return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
 } // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.test.cpp b/compiler/luci/import/src/CircleReader.test.cpp
new file mode 100644
index 0000000..0ce5b60
--- /dev/null
+++ b/compiler/luci/import/src/CircleReader.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/CircleReader.h"
+
+#include <gtest/gtest.h>
+
+TEST(VectorWrapperTest, basic_pattern)
+{
+  auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+  const std::vector<int32_t> data = {1, 4, 2, 0, 7};
+  auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+  auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_EQ(wrapper.size(), data.size());
+  ASSERT_TRUE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, wrong_data_NEG)
+{
+  auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+  std::vector<int32_t> data = {1, 4, 2, 0, 7};
+  auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+  auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  // change data
+  std::reverse(data.begin(), data.end());
+
+  ASSERT_EQ(wrapper.size(), data.size());
+  ASSERT_FALSE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, null_pointer)
+{
+  flatbuffers::Vector<int32_t> *vec_pointer = nullptr;
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_TRUE(wrapper.null());
+  ASSERT_TRUE(wrapper.empty());
+}
+
+TEST(VectorWrapperTest, prohibited_access_NEG)
+{
+  flatbuffers::Vector<uint8_t> *vec_pointer = nullptr;
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_ANY_THROW(wrapper.at(0));
+}
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
index 8eae5fc..3f7f785 100644
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -119,6 +119,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
   }
 
   // Create CircleConst nodes for constant tensors.
+  // NOTE Origin is intentionally not provided for constants.
   for (uint32_t i = 0; i < tensors.size(); ++i)
   {
     luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp
index 8366546..d963b4d 100644
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -16,9 +16,12 @@
 
 #include "luci/Importer.h"
 
-#include <loco.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 
 #include <gtest/gtest.h>
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
 
 TEST(TensorFlowLiteImport, Dummy)
 {
@@ -26,3 +29,283 @@ TEST(TensorFlowLiteImport, Dummy)
 
   SUCCEED();
 }
+
+// helpers for flatbuffers
+namespace
+{
+
+struct BasicCircleModel
+{
+  std::unique_ptr<circle::ModelT> model;
+
+  BasicCircleModel()
+  {
+    model = std::make_unique<circle::ModelT>();
+    model->buffers.push_back(std::make_unique<circle::BufferT>());
+    model->description = "nnpackage";
+    model->version = 0;
+  }
+
+  uint32_t add_subgraph()
+  {
+    model->subgraphs.push_back(std::make_unique<circle::SubGraphT>());
+    model->subgraphs.back()->name = "";
+    model->subgraphs.back()->data_format = circle::DataFormat_CHANNELS_LAST;
+    return model->subgraphs.size() - 1;
+  }
+
+  void add_subgraph_inputs(uint32_t subgraph_id, const std::vector<uint32_t> &inputs)
+  {
+    model->subgraphs[subgraph_id]->inputs.assign(inputs.begin(), inputs.end());
+  }
+
+  void add_subgraph_outputs(uint32_t subgraph_id, const std::vector<uint32_t> &outputs)
+  {
+    model->subgraphs[subgraph_id]->outputs.assign(outputs.begin(), outputs.end());
+  }
+
+  uint32_t add_builtin_opcode(circle::BuiltinOperator opcode)
+  {
+    uint32_t id = model->operator_codes.size();
+    model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>());
+    model->operator_codes[id]->builtin_code = opcode;
+    model->operator_codes[id]->version = 1;
+    return id;
+  }
+
+  uint32_t add_buffer()
+  {
+    model->buffers.push_back(std::make_unique<circle::BufferT>());
+    return model->buffers.size() - 1;
+  }
+
+  uint32_t add_float_tensor(uint32_t graph_id, const std::vector<int32_t> &shape,
+                            uint32_t buffer_id)
+  {
+    auto &graph = model->subgraphs[graph_id];
+    uint32_t idx = graph->tensors.size();
+    graph->tensors.push_back(std::make_unique<circle::TensorT>());
+    graph->tensors[idx]->shape = shape;
+    graph->tensors[idx]->type = circle::TensorType_FLOAT32;
+    graph->tensors[idx]->buffer = buffer_id;
+    graph->tensors[idx]->name = std::to_string(idx);
+    graph->tensors[idx]->quantization.reset(nullptr);
+    graph->tensors[idx]->is_variable = false;
+    graph->tensors[idx]->sparsity.reset(nullptr);
+    (void)graph->tensors[idx]->shape_signature;
+    return idx;
+  }
+
+  uint32_t add_builtin_operator(uint32_t graph_id, uint32_t opcode_id,
+                                const std::vector<uint32_t> &inputs,
+                                const std::vector<uint32_t> &outputs)
+  {
+    auto &graph = model->subgraphs[graph_id];
+    auto idx = graph->operators.size();
+    graph->operators.push_back(std::make_unique<circle::OperatorT>());
+    graph->operators[idx]->opcode_index = opcode_id;
+    graph->operators[idx]->inputs.assign(inputs.begin(), inputs.end());
+    graph->operators[idx]->outputs.assign(outputs.begin(), outputs.end());
+    graph->operators[idx]->builtin_options.Reset();
+    (void)graph->operators[idx]->custom_options;
+    graph->operators[idx]->custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
+    (void)graph->operators[idx]->mutating_variable_inputs;
+    (void)graph->operators[idx]->intermediates;
+    return idx;
+  }
+
+  uint32_t add_plan_metadata(uint32_t buffer_id)
+  {
+    static_assert(sizeof(uint32_t) == 4, "metadata is stored in blocks of 32 bit unsiged ints");
+    uint32_t idx = model->metadata.size();
+    model->metadata.push_back(std::make_unique<circle::MetadataT>());
+    model->metadata[idx]->name = "ONE_execution_plan_table";
+    model->metadata[idx]->buffer = buffer_id;
+    model->buffers[buffer_id]->data.resize(4);
+    auto &entries_count = *reinterpret_cast<uint32_t *>(model->buffers[buffer_id]->data.data());
+    entries_count = 0;
+    return idx;
+  }
+
+  void add_plan_entry(uint32_t plan_buffer_id, uint32_t execution_order,
+                      const std::vector<uint32_t> &offsets)
+  {
+    auto &buffer = model->buffers[plan_buffer_id]->data;
+    auto old_size = buffer.size();
+    assert(old_size % 4 == 0);
+    assert(old_size > 0);
+
+    // Allocate space for new entry:
+    // 4 bytes for entry id
+    // 4 bytes for entry size
+    // 4 bytes for execution order
+    // offsets.size() * 4 bytes for offsets
+    buffer.resize(old_size + 12 + offsets.size() * 4);
+    uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+    *number_of_entries_ptr += 1;
+
+    uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+    entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+    entry_data_ptr[1] = 1 + offsets.size();         // entry size
+    entry_data_ptr[2] = execution_order;            // execution order
+    std::copy(offsets.begin(), offsets.end(), entry_data_ptr + 3);
+  }
+};
+
+struct SimpleRELUModel : public BasicCircleModel
+{
+  SimpleRELUModel()
+  {
+    auto relu_opcode_id = add_builtin_opcode(circle::BuiltinOperator_RELU);
+
+    uint32_t subgraph_id = add_subgraph();
+
+    auto input_buffer_id = add_buffer();
+    auto output_buffer_id = add_buffer();
+
+    auto input_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, input_buffer_id);
+    auto output_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, output_buffer_id);
+
+    add_subgraph_inputs(subgraph_id, {input_tensor_idx});
+    add_subgraph_outputs(subgraph_id, {output_tensor_idx});
+
+    add_builtin_operator(subgraph_id, relu_opcode_id, {0}, {1});
+  }
+};
+
+} // namespace
+
+/**
+ * This test checks that one op RELU model with execution plan is successfully imported
+ */
+TEST(TensorFlowLiteImport, simple_plan)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+  model.add_plan_entry(metadata_buffer_id, 2, {300});
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  auto luci_module = import.importModule(model_ptr);
+
+  auto main_graph = luci_module->graph();
+  for (int i = 0; i < main_graph->nodes()->size(); ++i)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLEINPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 1);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 100);
+        break;
+      }
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 3);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 200);
+        break;
+      }
+      case luci::CircleOpcode::RELU:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 2);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 300);
+        break;
+      }
+      default:
+        FAIL();
+    }
+  }
+}
+
+/**
+ * This test checks that model with incomplete execution plan is successfully imported
+ */
+TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  auto luci_module = import.importModule(model_ptr);
+
+  auto main_graph = luci_module->graph();
+  for (int i = 0; i < main_graph->nodes()->size(); ++i)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLEINPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 1);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 100);
+        break;
+      }
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+      case luci::CircleOpcode::RELU:
+      {
+        ASSERT_FALSE(luci::has_execution_plan(node));
+        break;
+      }
+      default:
+        FAIL();
+    }
+  }
+}
+
+/**
+ * This test checks that corrupted execution plan induce exception
+ */
+TEST(TensorFlowLiteImport, corrupted_plan_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+  model.add_plan_entry(metadata_buffer_id, 2, {300});
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  // corrupt data
+  *reinterpret_cast<uint32_t *>(model.model->buffers[metadata_buffer_id]->data.data()) = 4;
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt
index 433b7cd..2f6ee23 100644
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_lang ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_lang PRIVATE src)
 target_include_directories(luci_lang PUBLIC include)
 target_link_libraries(luci_lang PUBLIC loco)
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt
index b64a065..bbd733f 100644
--- a/compiler/luci/log/CMakeLists.txt
+++ b/compiler/luci/log/CMakeLists.txt
@@ -1,11 +1,11 @@
 # TODO Find how to test logging framework
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
-if (NOT LIBRARY_TYPE)
-    set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_log ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_log ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_log PUBLIC include)
 target_link_libraries(luci_log PUBLIC hermes)
 target_link_libraries(luci_log PRIVATE hermes_std)
diff --git a/compiler/luci/log/include/luci/Log.h b/compiler/luci/log/include/luci/Log.h
index e148810..ddb34f4 100644
--- a/compiler/luci/log/include/luci/Log.h
+++ b/compiler/luci/log/include/luci/Log.h
@@ -48,7 +48,6 @@ public:
 
 private:
   bool _show_warn = true;
-  bool _show_info = false;
   int _show_verbose = 0;
 };
 
@@ -67,8 +66,8 @@ private:
 #define LOGGER(name) ::luci::Logger name{::luci::LoggingContext::get()};
 
 // TODO Support FATAL, ERROR
-#define INFO(name) HERMES_INFO(name)
-#define WARN(name) HERMES_WARN(name)
+#define INFO(name) HERMES_VERBOSE(name, 3)
+#define WARN(name) HERMES_VERBOSE(name, 2)
 #define VERBOSE(name, lv) HERMES_VERBOSE(name, lv)
 
 // WARNING!
diff --git a/compiler/luci/log/src/Log.cpp b/compiler/luci/log/src/Log.cpp
index c26bf30..0cc45e8 100644
--- a/compiler/luci/log/src/Log.cpp
+++ b/compiler/luci/log/src/Log.cpp
@@ -33,11 +33,6 @@ namespace
  */
 template <typename T> T safecast(const char *, const T &);
 
-template <> bool safecast<bool>(const char *s, const bool &value)
-{
-  return (s == nullptr) ? value : (std::stoi(s) != 0);
-}
-
 template <> int safecast<int>(const char *s, const int &value)
 {
   return (s == nullptr) ? value : std::stoi(s);
@@ -68,9 +63,6 @@ LoggerConfig::LoggerConfig()
 
   _show_warn = !settings->get(luci::UserSettings::Key::MuteWarnings);
 
-  // Turn on info logging if LUCI_LOG is set as non-zero value
-  _show_info = safecast<bool>(std::getenv("LUCI_LOG"), false);
-
   // Turn on verbose logging if LUCI_LOG is set to some level
   // VERBOSE(l, 1) will be visible with LUCI_LOG=2 and VERBOSE(l, 2) with LUCI_LOG=3 and so on
   _show_verbose = safecast<int>(std::getenv("LUCI_LOG"), 0);
@@ -87,6 +79,8 @@ void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setti
 
 void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
 {
+  // TODO remove deprecated codes
+#if 0
   setting.filter(hermes::SeverityCategory::FATAL).reject_all();
   setting.filter(hermes::SeverityCategory::ERROR).reject_all();
   setting.filter(hermes::SeverityCategory::WARN).reject_all();
@@ -106,6 +100,16 @@ void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) c
   {
     setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
   }
+#endif
+  setting.reject_all();
+  setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose);
+  setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose);
+  if (_show_warn)
+  {
+    setting.filter(hermes::SeverityCategory::WARN).accept_upto(_show_verbose);
+  }
+  setting.filter(hermes::SeverityCategory::INFO).accept_upto(_show_verbose);
+  setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
 }
 
 } // namespace luci
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
index 4d801b0..aed9fb7 100644
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,11 +1,11 @@
 # TODO Find how to test logging-ex utility
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
-if (NOT LIBRARY_TYPE)
-    set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_logex ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_logex PUBLIC include)
 target_link_libraries(luci_logex PUBLIC loco)
 target_link_libraries(luci_logex PUBLIC locop)
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
index eacbe1c..ec8e0b0 100644
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_partition ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_partition PRIVATE src)
 target_include_directories(luci_partition PUBLIC include)
 target_link_libraries(luci_partition PUBLIC luci_lang)
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
index b767c77..c517bf9 100644
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -58,6 +58,9 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
     //         we need to clone this CircleConst for each graph of the group.
     if (dynamic_cast<const luci::CircleConst *>(input) != nullptr)
       continue;
+    // Skip also for OutputExclude
+    if (dynamic_cast<const luci::CircleOutputExclude *>(input) != nullptr)
+      continue;
 
     auto input_group = pgroups->group_of(input);
     // NOTE: all the nodes should be registered and return should be valid group.
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
index e0b4e8e..0080873 100644
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -35,6 +35,17 @@ class IsVirtualNode final : public luci::CircleNodeVisitor<bool>
 public:
   bool visit(const luci::CircleInput *) final { return true; }
   bool visit(const luci::CircleOutput *) final { return true; }
+  // For multiple outputs
+  bool visit(const luci::CircleCustomOut *) final { return true; }
+  bool visit(const luci::CircleIfOut *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5Out *) final { return true; }
+  bool visit(const luci::CircleSplitOut *) final { return true; }
+  bool visit(const luci::CircleSplitVOut *) final { return true; }
+  bool visit(const luci::CircleTopKV2Out *) final { return true; }
+  bool visit(const luci::CircleUniqueOut *) final { return true; }
+  bool visit(const luci::CircleUnpackOut *) final { return true; }
+  bool visit(const luci::CircleWhileOut *) final { return true; }
   // TODO add all virtual nodes
 
   // default is false
@@ -58,6 +69,91 @@ bool check_allocate_partition(const luci::CircleNode *node)
   return true;
 }
 
+class FindGroupToFollow final : public luci::CircleNodeVisitor<const std::string &>
+{
+public:
+  FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups)
+    : _partition(partition), _pgroups(pgroups)
+  {
+    // NOTHING TODO
+  }
+
+private:
+  const std::string &groupof(const luci::CircleNode *input) const
+  {
+    auto group = _pgroups->node2group[input];
+    assert(not group.empty());
+    if (group.empty())
+      return _partition.default_group;
+    return _pgroups->node2group[input];
+  }
+
+public:
+#define IMPLEMENT(CLASS)                                             \
+  const std::string &visit(const luci::CLASS *node) final            \
+  {                                                                  \
+    auto input = loco::must_cast<luci::CircleNode *>(node->input()); \
+    return groupof(input);                                           \
+  }
+
+  IMPLEMENT(CircleCustomOut);
+  IMPLEMENT(CircleIfOut);
+  IMPLEMENT(CircleNonMaxSuppressionV4Out);
+  IMPLEMENT(CircleNonMaxSuppressionV5Out);
+  IMPLEMENT(CircleSplitOut);
+  IMPLEMENT(CircleSplitVOut);
+  IMPLEMENT(CircleTopKV2Out);
+  IMPLEMENT(CircleUniqueOut);
+  IMPLEMENT(CircleUnpackOut);
+  IMPLEMENT(CircleWhileOut);
+
+#undef IMPLEMENT
+
+  // return empty for nothing to do
+  const std::string &visit(const luci::CircleNode *) final { return _empty_str; }
+
+private:
+  const luci::PartitionTable &_partition;
+  luci::PGroups *_pgroups = nullptr;
+  std::string _empty_str;
+};
+
+} // namespace
+
+namespace
+{
+
+void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx)
+{
+  auto pgroup = std::make_unique<luci::PGroup>();
+  pgroup->group = group;
+  pgroup->id = idx + 1;
+
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+  pnode->group = group;
+  pnode->pgroup = pgroup.get();
+
+  pgroup->pnodes.push_back(std::move(pnode));
+
+  // Set input of PGroup
+  for (uint32_t in = 0; in < node->arity(); ++in)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
+    // this input maybe CircleInput in source graph
+    // --> not confident this is safe
+    pgroup->inputs.push_back(input);
+  }
+  // Set output of PGroup: node itself or multiple virtual outputs
+  // TODO support multiple virtual outputs
+  pgroup->outputs.push_back(node);
+
+  pgroups->node2group[node] = group;
+  pgroups->id2pgroup[pgroup->id] = pgroup.get();
+
+  pgroups->pgroups.push_back(std::move(pgroup));
+}
+
 } // namespace
 
 namespace luci
@@ -120,6 +216,8 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
       INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
               << std::endl;
 
+      append(node, pgroups.get(), group, idx);
+#if 0
       auto pgroup = std::make_unique<luci::PGroup>();
       pgroup->group = group;
       pgroup->id = idx + 1;
@@ -147,6 +245,7 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
       pgroups->id2pgroup[pgroup->id] = pgroup.get();
 
       pgroups->pgroups.push_back(std::move(pgroup));
+#endif
     }
     else
     {
@@ -156,6 +255,22 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
     }
   }
 
+  // handle for virtual nodes like multiple outputs
+  // these nodes should follow group of the input
+  for (uint32_t idx = 0; idx < nodes->size(); ++idx)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
+
+    // for virtual nodes like CircleUnpackOut should follow it's input (owner)
+    // or just set to default
+    FindGroupToFollow query(partition, pgroups.get());
+    const auto &group = node->accept(&query);
+    if (not group.empty())
+    {
+      append(node, pgroups.get(), group, idx);
+    }
+  }
+
   return std::move(pgroups);
 }
 
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index 2361bb4..b8b406a 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
 if(NOT FlatBuffers_FOUND)
   message(STATUS "FlatBuffers NOT FOUND")
   return()
@@ -8,11 +8,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_pass PRIVATE src)
 target_include_directories(luci_pass PUBLIC include)
 target_link_libraries(luci_pass PUBLIC loco)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 917caca..658563e 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -93,6 +93,8 @@ public:
       Quantize_tensor_names,
       Quantize_scales,
       Quantize_zero_points,
+      Quantize_input_type,
+      Quantize_output_type,
 
       // sparsify
       Sparsify_tensor_name,
@@ -104,9 +106,6 @@ public:
       // convert NCHW to NHWC
       NCHW_to_NHWC_input_shape,
       NCHW_to_NHWC_output_shape,
-
-      Quantize_input_dtype = Quantize_input_model_dtype,   // TODO Remove this
-      Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this
     };
 
     virtual ~Options() = default;
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
index d618a07..648abad 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -31,11 +31,23 @@ namespace luci
  */
 class QuantizeWithMinMaxPass : public logo::Pass
 {
+  // For backward-compatibility
+  // TODO Remove this constructor
 public:
   QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
                          QuantizationGranularity granularity)
-    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
-                                                                                        granularity}
+    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
+      _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype}
+  {
+    // DO NOTHING
+  }
+
+public:
+  QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
+                         QuantizationGranularity granularity, loco::DataType input_type,
+                         loco::DataType output_type)
+    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
+      _granularity{granularity}, _input_type{input_type}, _output_type{output_type}
   {
     // DO NOTHING
   }
@@ -45,9 +57,15 @@ public:
   bool run(loco::Graph *graph);
 
 private:
+  void set_input_type(loco::Graph *graph) const;
+  void set_output_type(loco::Graph *graph) const;
+
+private:
   loco::DataType _input_model_dtype;
   loco::DataType _output_model_dtype;
   QuantizationGranularity _granularity;
+  loco::DataType _input_type;
+  loco::DataType _output_type;
 };
 
 } // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 5d0c926..75f04b3 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -468,12 +468,20 @@ void CircleOptimizer::quantize(loco::Graph *g) const
     static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
     static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
     static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
+    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
 
     auto input_model_dtype =
       _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
     auto output_model_dtype =
       _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
     auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+    auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
+    if (input_type.empty())
+      input_type = output_model_dtype;
+    auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
+    if (output_type.empty())
+      output_type = output_model_dtype;
 
     if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
       throw std::runtime_error("Unsupported input type. List of supported input types: " +
@@ -487,13 +495,21 @@ void CircleOptimizer::quantize(loco::Graph *g) const
       throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                to_string(qwmm_supported_granularity));
 
+    if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(qwmm_supported_input_type));
+
+    if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(qwmm_supported_output_type));
+
     if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
         str_to_dtype(output_model_dtype) != loco::DataType::U8)
       throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
 
-    luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype),
-                                           str_to_dtype(output_model_dtype),
-                                           str_to_granularity(granularity));
+    luci::QuantizeWithMinMaxPass quantizer(
+      str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype),
+      str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type));
     quantizer.run(g);
 
     // Post-quantization optimizations
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
index 66e3415..d83973c 100644
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
@@ -72,13 +72,6 @@ bool fuse_activation_function(luci::CircleNode *node)
     else
       return false;
   }
-  else if (opcode == luci::CircleOpcode::TANH)
-  {
-    if (fused_act == luci::FusedActFunc::NONE)
-      target_func = luci::FusedActFunc::TANH;
-    else
-      return false;
-  }
   else
     return false;
 
@@ -98,8 +91,9 @@ bool FuseActivationFunctionPass::run(loco::Graph *g)
   {
     auto circle_node = static_cast<luci::CircleNode *>(node);
     auto opcode = circle_node->opcode();
+    // TANH is not supported as CONV fused with TANH is not supported in luci-interpreter
     if (opcode == luci::CircleOpcode::RELU || opcode == luci::CircleOpcode::RELU6 ||
-        opcode == luci::CircleOpcode::RELU_N1_TO_1 || opcode == luci::CircleOpcode::TANH)
+        opcode == luci::CircleOpcode::RELU_N1_TO_1)
     {
       if (fuse_activation_function(circle_node))
         changed = true;
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
index 56b4141..9e0a80d 100644
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
@@ -86,6 +86,47 @@ protected:
   luci::CircleConst *_conv2_b = nullptr;
 };
 
+class ConvTanhConvGraphlet
+{
+public:
+  ConvTanhConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv1 = g->nodes()->create<luci::CircleConv2D>();
+    _conv2 = g->nodes()->create<luci::CircleConv2D>();
+    _tanh = g->nodes()->create<luci::CircleTanh>();
+    _conv1_f = g->nodes()->create<luci::CircleConst>();
+    _conv1_b = g->nodes()->create<luci::CircleConst>();
+    _conv2_f = g->nodes()->create<luci::CircleConst>();
+    _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+    _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _conv1->name("conv1");
+    _conv2->name("conv2");
+    _tanh->name("tanh");
+    _conv1_f->name("conv1f");
+    _conv1_b->name("conv1b");
+    _conv2_f->name("conv2f");
+    _conv2_b->name("conv2b");
+  }
+
+public:
+  luci::CircleTanh *tanh() { return _tanh; }
+  luci::CircleConv2D *conv1() { return _conv1; }
+  luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+  luci::CircleConv2D *_conv1 = nullptr;
+  luci::CircleConv2D *_conv2 = nullptr;
+  luci::CircleTanh *_tanh = nullptr;
+  luci::CircleConst *_conv1_f = nullptr;
+  luci::CircleConst *_conv1_b = nullptr;
+  luci::CircleConst *_conv2_f = nullptr;
+  luci::CircleConst *_conv2_b = nullptr;
+};
+
 class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet
 {
 public:
@@ -110,6 +151,30 @@ public:
   }
 };
 
+class FuseTanhActTestGraph : public TestIOGraph, public ConvTanhConvGraphlet
+{
+public:
+  FuseTanhActTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    ConvTanhConvGraphlet::init(g());
+
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
+
+    _tanh->x(_conv1);
+
+    _conv2->input(_tanh);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_conv2);
+  }
+};
+
 class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet
 {
 public:
@@ -190,3 +255,15 @@ TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG)
   // Relu input Conv2D already has activation function
   EXPECT_FALSE(pass.run(g.g()));
 }
+
+TEST(FusePreActivationBatchNorm, fuse_tanh_NEG)
+{
+  FuseTanhActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+
+  // Tanh should not be fused
+  // This can be changed when CONV+TANH is supported by luci-interpreter
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index be81732..c3552ec 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -20,6 +20,7 @@
 #include <luci/IR/CircleNodes.h>
 #include <luci/IR/CircleNodeVisitor.h>
 #include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
 #include <luci/Log.h>
 
 #include <oops/UserExn.h>
@@ -63,6 +64,52 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc
   }
 }
 
+// Create a Quantize Op whose
+// dtype is out_type
+// shape is the same with node
+// qparam is computed using node's min/max
+luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
+{
+  auto quantize = node->graph()->nodes()->create<CircleQuantize>();
+  quantize->name(node->name() + "_Quantize");
+  quantize->dtype(out_type);
+  quantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    quantize->dim(i).set(node->dim(i).value());
+
+  quantize->shape_status(luci::ShapeStatus::VALID);
+
+  auto qparam = node->quantparam();
+  assert(qparam);                  // FIX_CALLER_UNLESS
+  assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
+  assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
+  auto min = qparam->min[0];
+  auto max = qparam->max[0];
+
+  float scaling_factor{0};
+  int64_t zp{0};
+  float nudged_min{0};
+  float nudged_max{0};
+
+  if (out_type == loco::DataType::U8)
+  {
+    compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+  }
+  else
+  {
+    assert(out_type == loco::DataType::S16);
+    compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+  }
+
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->scale.push_back(scaling_factor);
+  quantparam->zerop.push_back(zp);
+
+  quantize->quantparam(std::move(quantparam));
+
+  return quantize;
+}
+
 } // namespace
 
 namespace luci
@@ -743,8 +790,6 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
           scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor);
         }
 
-        circle_node->quantparam()->min.clear();
-        circle_node->quantparam()->max.clear();
         circle_node->quantparam()->scale.push_back(scaling_factor);
         circle_node->quantparam()->zerop.push_back(zp);
       }
@@ -1467,6 +1512,97 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant
   quant_input(&CirclePadV2::constant_values, 2);
 }
 
+void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
+{
+  auto inputs = g->inputs();
+  for (auto node : loco::input_nodes(g))
+  {
+    auto input = loco::must_cast<luci::CircleInput *>(node);
+    if (input->dtype() == _input_type)
+      continue;
+
+    // Bool type is not quantizable
+    if (input->dtype() == loco::DataType::BOOL)
+      continue;
+
+    // Insert Quantize Op
+    auto quant_op = create_quantize_op(input, input->dtype());
+    loco::replace(input).with(quant_op);
+    quant_op->input(input);
+
+    // TODO Set a proper origin (Quantize should have its own Origin)
+    {
+      auto succs = loco::succs(quant_op);
+      assert(succs.size() > 0);
+      auto succ = loco::must_cast<luci::CircleNode *>(*succs.begin());
+      luci::add_origin(quant_op, luci::get_origin(succ));
+    }
+
+    // Requantize input
+    {
+      auto quantparam = input->quantparam();
+      assert(quantparam);
+      assert(quantparam->min.size() == 1); // only support layer-wise quant
+      assert(quantparam->max.size() == 1); // only support layer-wise quant
+      auto min = quantparam->min[0];
+      auto max = quantparam->max[0];
+
+      float scaling_factor{0};
+      int64_t zp{0};
+      float nudged_min{0};
+      float nudged_max{0};
+
+      if (_input_type == loco::DataType::U8)
+      {
+        compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      }
+      else
+      {
+        assert(_input_type == loco::DataType::S16);
+        compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      }
+      input->dtype(_input_type);
+      input->quantparam()->scale[0] = scaling_factor;
+      input->quantparam()->zerop[0] = zp;
+    }
+
+    auto graph_input = inputs->at(input->index());
+    graph_input->dtype(_input_type);
+  }
+}
+
+void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
+{
+  auto outputs = g->outputs();
+  for (auto node : loco::output_nodes(g))
+  {
+    auto output = loco::must_cast<luci::CircleOutput *>(node);
+    if (output->dtype() == _output_type)
+      continue;
+
+    // Bool type is not quantizable
+    if (output->dtype() == loco::DataType::BOOL)
+      continue;
+
+    auto from = loco::must_cast<luci::CircleNode *>(output->from());
+
+    // The last Op is not quantizable Op (ex: ArgMax)
+    if (not from->quantparam())
+      continue;
+
+    // Insert Quantize Op
+    auto quant_op = create_quantize_op(from, _output_type);
+    loco::replace(from).with(quant_op);
+    quant_op->input(from);
+
+    // TODO Set a proper origin (Quantize should have its own Origin)
+    luci::add_origin(quant_op, luci::get_origin(from));
+
+    auto graph_output = outputs->at(output->index());
+    graph_output->dtype(_output_type);
+  }
+}
+
 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 {
   LOGGER(l);
@@ -1539,6 +1675,23 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
     }
   }
 
+  // Set input type
+  set_input_type(g);
+
+  // Set output type
+  set_output_type(g);
+
+  // Remove min/max values
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (auto qparam = circle_node->quantparam())
+    {
+      qparam->min.clear();
+      qparam->max.clear();
+    }
+  }
+
   INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
   return false; // one time run
 }
diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt
index 9ca6dcb..d4c8f63 100644
--- a/compiler/luci/plan/CMakeLists.txt
+++ b/compiler/luci/plan/CMakeLists.txt
@@ -1,10 +1,12 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-    set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_plan ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_plan PRIVATE src)
 target_include_directories(luci_plan PUBLIC include)
 target_link_libraries(luci_plan PUBLIC loco)
@@ -13,3 +15,12 @@ target_link_libraries(luci_plan PUBLIC luci_lang)
 install(TARGETS luci_plan DESTINATION lib)
 install(DIRECTORY include/ DESTINATION include
         FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_plan_test ${TESTS})
+target_link_libraries(luci_plan_test luci_plan)
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
new file mode 100644
index 0000000..d7ccf25
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+TEST(CircleNodeExecutionPlan, basic_fields)
+{
+  luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+
+  ASSERT_EQ(plan.order_in_plan(), 123);
+  ASSERT_THAT(plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+
+  plan.order_in_plan(321);
+  plan.offsets({1, 2, 3, 4});
+
+  ASSERT_EQ(plan.order_in_plan(), 321);
+  ASSERT_THAT(plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
+
+TEST(CircleNodeExecutionPlan, add_extract_plan)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+  luci::add_execution_plan(add, plan);
+
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  auto extracted_plan = luci::get_execution_plan(add);
+
+  ASSERT_EQ(extracted_plan.order_in_plan(), 123);
+  ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+}
+
+TEST(CircleNodeExecutionPlan, extract_plan_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  ASSERT_ANY_THROW(luci::get_execution_plan(add));
+}
+
+TEST(CircleNodeExecutionPlan, double_set_plan_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan1(123, {4, 5, 6, 7});
+  luci::add_execution_plan(add, plan1);
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan2(321, {1, 2, 3, 4});
+  luci::add_execution_plan(add, plan2);
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  auto extracted_plan = luci::get_execution_plan(add);
+  ASSERT_EQ(extracted_plan.order_in_plan(), 321);
+  ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt
index ae604ab..f8a0cc0 100644
--- a/compiler/luci/profile/CMakeLists.txt
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_profile ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_profile PRIVATE src)
 target_include_directories(luci_profile PUBLIC include)
 target_link_libraries(luci_profile PUBLIC loco)
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
index f48210b..0e6097f 100644
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-if (NOT LIBRARY_TYPE)
-  set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
 
-add_library(luci_service ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_service PRIVATE src)
 target_include_directories(luci_service PUBLIC include)
 target_link_libraries(luci_service PUBLIC luci_lang)
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
index 79f3230..90dfa77 100644
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -38,6 +38,16 @@ def _get_parser():
 
     _utils._add_default_arg(parser)
 
+    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = ['-' + s for s in opt_name_list]
+    if not opt_name_list:
+        opt_help_message = '(No available optimization options)'
+    else:
+        opt_help_message = '(Available optimization options: ' + ', '.join(
+            opt_name_list) + ')'
+    opt_help_message = 'optimization name to use ' + opt_help_message
+    parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
     return parser
 
 
@@ -55,6 +65,14 @@ def _verify_arg(parser, args):
     # check if required arguments is given
     if not _utils._is_valid_attr(args, 'config'):
         parser.error('-C/--config argument is required')
+    # check if given optimization option exists
+    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
+    if _utils._is_valid_attr(args, 'O'):
+        if ' ' in getattr(args, 'O'):
+            parser.error('Not allowed to have space in the optimization name')
+        if not getattr(args, 'O') in opt_name_list:
+            parser.error('Invalid optimization option')
 
 
 def _get_driver_name(driver_name):
@@ -101,6 +119,27 @@ def _verify_cfg(driver_list, config):
         raise AssertionError('Only one import-* driver can be executed')
 
 
+# verify given optimization option file
+def _verify_opt(args):
+    if _utils._is_valid_attr(args, 'O'):
+        config = configparser.ConfigParser()
+        config.optionxform = str
+        opt_name_path_dic = dict(
+            zip(_utils._get_optimization_list(get_name=True),
+                _utils._get_optimization_list()))
+        parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')])
+        # check if given optimization option file exists
+        if not parsed:
+            raise FileNotFoundError('Not found given optimization configuration file')
+        # check if given optimization option file only has `one-optimize` section
+        if len(config.sections()) == 1 and config.sections()[0] == 'one-optimize':
+            pass
+        else:
+            raise AssertionError(
+                'Optimization configuration file only allowed to have a \'one-optimize\' section'
+            )
+
+
 def main():
     # parse arguments
     # since the configuration file path is required first,
@@ -121,6 +160,9 @@ def main():
     ]
     _verify_cfg(drivers, config)
 
+    # verify optimization option file
+    _verify_opt(args)
+
     # get sections to run
     section_to_run = []
     for d in drivers:
@@ -132,6 +174,8 @@ def main():
     for section in section_to_run:
         driver_path = os.path.join(dir_path, _get_driver_name(section))
         cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
+        if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
+            cmd += ['-O', getattr(args, 'O')]
         _utils._run(cmd)
 
 
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index 6ce973c..a64abff 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -64,6 +64,9 @@ def _get_parser():
         # opt = (option_name, help_message)
         circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
 
+    # optimization option from one-build
+    parser.add_argument('-O', type=str, help=argparse.SUPPRESS)
+
     return parser
 
 
@@ -113,6 +116,15 @@ def _optimize(args):
         _utils._run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
 
 
+def _parse_opt(args):
+    if _utils._is_valid_attr(args, 'O'):
+        opt_name_path_dic = dict(
+            zip(_utils._get_optimization_list(get_name=True),
+                _utils._get_optimization_list()))
+        config_path = opt_name_path_dic['O' + getattr(args, 'O')]
+        _utils._parse_cfg_and_overwrite(config_path, 'one-optimize', args)
+
+
 def main():
     # parse arguments
     parser = _get_parser()
@@ -121,6 +133,11 @@ def main():
     # parse configuration file
     _utils._parse_cfg(args, 'one-optimize')
 
+    # parse optimization file
+    # NOTE if there is a `one-optimize` section in above configuration file as well,
+    # it will be overwritten
+    _parse_opt(args)
+
     # verify arguments
     _verify_arg(parser, args)
 
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index cd623a6..22d4ddb 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -39,13 +39,13 @@ def _get_parser():
 
     # input and output path.
     parser.add_argument(
-        '-i', '--input_path', type=str, help='full filepath of the input file')
+        '-i', '--input_path', type=str, help='full filepath of the input circle model')
     parser.add_argument(
         '-d',
         '--input_data',
         type=str,
         help=
-        'full filepath of the input data file. if not specified, run with random input data.'
+        'full filepath of the input data used for post-training quantization. if not specified, run with random input data.'
     )
     parser.add_argument(
         '-f',
@@ -55,7 +55,10 @@ def _get_parser():
         'file format of input data. h5/hdf5 (default), list/filelist (a text file where a file path of input data is written in each line), or dir/directory (a directory where input data are saved)'
     )
     parser.add_argument(
-        '-o', '--output_path', type=str, help='full filepath of the output file')
+        '-o',
+        '--output_path',
+        type=str,
+        help='full filepath of the output quantized model')
 
     # argument for profiling
     parser.add_argument(
@@ -70,41 +73,77 @@ def _get_parser():
     quantization_group.add_argument(
         '--input_dtype',
         type=str,
-        help='input data type (supported: float32, default=float32)')
+        help=
+        'input model data type (supported: float32, default=float32). Deprecated (Use input_model_dtype)'
+    )
+    quantization_group.add_argument(
+        '--input_model_dtype',
+        type=str,
+        help='input model data type (supported: float32, default=float32)')
     quantization_group.add_argument(
         '--quantized_dtype',
         type=str,
-        help='output quantized data type (supported: uint8, int16, default=uint8)')
+        help='data type of output quantized model (supported: uint8, int16, default=uint8)'
+    )
     quantization_group.add_argument(
         '--granularity',
         type=str,
-        help='quantize granularity (supported: layer, channel, default=layer)')
+        help='quantization granularity (supported: layer, channel, default=layer)')
+    quantization_group.add_argument(
+        '--input_type',
+        type=str,
+        help=
+        'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+    )
+    quantization_group.add_argument(
+        '--output_type',
+        type=str,
+        help=
+        'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+    )
     quantization_group.add_argument(
-        '--min_percentile', type=str, help='minimum percentile (0.0~100.0, default=1.0)')
+        '--min_percentile',
+        type=str,
+        help=
+        'minimum percentile (0.0~100.0, default=1.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+    )
     quantization_group.add_argument(
-        '--max_percentile', type=str, help='maximum percentile (0.0~100.0, default=99.0)')
+        '--max_percentile',
+        type=str,
+        help=
+        'maximum percentile (0.0~100.0, default=99.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+    )
     quantization_group.add_argument(
         '--mode',
         type=str,
-        help='record mode (supported: percentile/moving_average, default=percentile)')
+        help=
+        "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
+    )
 
-    # arguments for force_quantparam
-    parser.add_argument(
+    # arguments for force_quantparam option
+    force_quantparam_group = parser.add_argument_group(
+        'arguments for force_quantparam option')
+
+    force_quantparam_group.add_argument(
         '--force_quantparam',
         action='store_true',
-        help='write quantparam to the specified tensor')
-    parser.add_argument(
+        help=
+        'overwrite quantparam (scale, zero_point) to the specified tensor in the quantized model.'
+    )
+    force_quantparam_group.add_argument(
         '--tensor_name', type=str, action='append', help='tensor name (string)')
-    parser.add_argument('--scale', type=float, action='append', help='scale (float)')
-    parser.add_argument(
+    force_quantparam_group.add_argument(
+        '--scale', type=float, action='append', help='scale (float)')
+    force_quantparam_group.add_argument(
         '--zero_point', type=int, action='append', help='zero point (int)')
 
     return parser
 
 
 def _set_default_values(args):
-    if not _utils._is_valid_attr(args, 'input_dtype'):
-        setattr(args, 'input_dtype', 'float32')
+    if not _utils._is_valid_attr(args, 'input_model_dtype') and not _utils._is_valid_attr(
+            args, 'input_dtype'):
+        setattr(args, 'input_model_dtype', 'float32')
     if not _utils._is_valid_attr(args, 'quantized_dtype'):
         setattr(args, 'quantized_dtype', 'uint8')
     if not _utils._is_valid_attr(args, 'granularity'):
@@ -174,7 +213,10 @@ def _quantize(args):
             circle_quantizer_cmd.append('--verbose')
         # quantize_dequantize_weights
         circle_quantizer_cmd.append('--quantize_dequantize_weights')
-        if _utils._is_valid_attr(args, 'input_dtype'):
+        # Use input_model_dtype if it exists. Use input_dtype otherwise.
+        if _utils._is_valid_attr(args, 'input_model_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+        elif _utils._is_valid_attr(args, 'input_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
         if _utils._is_valid_attr(args, 'quantized_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
@@ -243,12 +285,21 @@ def _quantize(args):
             circle_quantizer_cmd.append('--verbose')
         # quantize_dequantize_weights
         circle_quantizer_cmd.append('--quantize_with_minmax')
-        if _utils._is_valid_attr(args, 'input_dtype'):
+        # Use input_model_dtype if it exists. Use input_dtype otherwise.
+        if _utils._is_valid_attr(args, 'input_model_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+        elif _utils._is_valid_attr(args, 'input_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
         if _utils._is_valid_attr(args, 'quantized_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
         if _utils._is_valid_attr(args, 'granularity'):
             circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if _utils._is_valid_attr(args, 'input_type'):
+            circle_quantizer_cmd.append('--input_type')
+            circle_quantizer_cmd.append(getattr(args, 'input_type'))
+        if _utils._is_valid_attr(args, 'output_type'):
+            circle_quantizer_cmd.append('--output_type')
+            circle_quantizer_cmd.append(getattr(args, 'output_type'))
         # input and output path
         circle_quantizer_cmd.append(tmp_output_path_2)
         if _utils._is_valid_attr(args, 'output_path'):
diff --git a/compiler/one-cmds/tests/OONE-BUILD_014.cfg b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
new file mode 100644
index 0000000..a39aae0
--- /dev/null
+++ b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/one-build_014.cfg b/compiler/one-cmds/tests/one-build_014.cfg
new file mode 100644
index 0000000..f09145e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/one-build_014.test b/compiler/one-cmds/tests/one-build_014.test
new file mode 100644
index 0000000..b3acbf5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.test
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONE-BUILD_014` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    âââ backends
+    âââ bin
+    âââ doc
+    âââ include
+    âââ lib
+    âââ optimization
+    âââ test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+  rm -rf ../optimization/OONE-BUILD_014.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+}
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  clean_envir
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_014.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+cp OONE-BUILD_014.cfg ../optimization
+
+# run test
+LUCI_LOG=5 one-build -C ${configfile} -OONE-BUILD_014 > ${filename}.log 2>&1
+
+clean_envir
+
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-build_neg_007.test b/compiler/one-cmds/tests/one-build_neg_007.test
new file mode 100644
index 0000000..5c5d9af
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_007.test
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    âââ backends
+    âââ bin
+    âââ doc
+    âââ include
+    âââ lib
+    âââ optimization
+    âââ test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  rm -rf ../optimization/OONE_BUILD_NEG_007.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONE_BUILD_NEG_007.cfg
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_008.test b/compiler/one-cmds/tests/one-build_neg_008.test
new file mode 100644
index 0000000..8ed2871
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_008.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid optimization option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_009.test b/compiler/one-cmds/tests/one-build_neg_009.test
new file mode 100644
index 0000000..8d9c831
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_009.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have space in the optimization name
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not allowed to have space in the optimization name" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_007.test b/compiler/one-cmds/tests/one-quantize_007.test
new file mode 100644
index 0000000..34ae92d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_007.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.iq8.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+  /bin/bash one-import_001.test > /dev/null 2>&1
+  return_code=$?
+  if [[ ${return_code} != 0 ]]; then
+    trap_err_onexit
+  fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_008.test b/compiler/one-cmds/tests/one-quantize_008.test
new file mode 100644
index 0000000..aff6bcf
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_008.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.oq8.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+  /bin/bash one-import_001.test > /dev/null 2>&1
+  return_code=$?
+  if [[ ${return_code} != 0 ]]; then
+    trap_err_onexit
+  fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--output_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test
new file mode 100644
index 0000000..ac920a4
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_019.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported input type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_019.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type float32 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_023.cfg b/compiler/one-cmds/tests/onecc_023.cfg
new file mode 100644
index 0000000..edbcc6f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_023.q16.iq8.circle
+quantized_dtype=int16
+granularity=channel
+input_type=uint8
diff --git a/compiler/one-cmds/tests/onecc_023.test b/compiler/one-cmds/tests/onecc_023.test
new file mode 100644
index 0000000..50b3b1c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_023.cfg"
+outputfile="inception_v3.onecc_023.q16.iq8.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index efb01a2..5d84c2b 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -16,6 +16,8 @@
 
 import argparse
 import configparser
+import glob
+import ntpath
 import os
 import subprocess
 import sys
@@ -124,9 +126,34 @@ def _is_valid_attr(args, attr):
     return hasattr(args, attr) and getattr(args, attr)
 
 
+def _parse_cfg_and_overwrite(config_path, section, args):
+    """
+    parse given section of configuration file and set the values of args.
+    Even if the values parsed from the configuration file already exist in args,
+    the values are overwritten.
+    """
+    if config_path == None:
+        # DO NOTHING
+        return
+    config = configparser.ConfigParser()
+    # make option names case sensitive
+    config.optionxform = str
+    parsed = config.read(config_path)
+    if not parsed:
+        raise FileNotFoundError('Not found given configuration file')
+    if not config.has_section(section):
+        raise AssertionError('configuration file doesn\'t have \'' + section +
+                             '\' section')
+    for key in config[section]:
+        setattr(args, key, config[section][key])
+    # TODO support accumulated arguments
+
+
 def _parse_cfg(args, driver_name):
     """parse configuration file. If the option is directly given to the command line,
-       the option is processed prior to the configuration file."""
+       the option is processed prior to the configuration file.
+       That is, if the values parsed from the configuration file already exist in args,
+       the values are ignored."""
     if _is_valid_attr(args, 'config'):
         config = configparser.ConfigParser()
         config.optionxform = str
@@ -290,3 +317,54 @@ def _run(cmd, err_prefix=None, logfile=None):
                     logfile.write(line)
     if p.returncode != 0:
         sys.exit(p.returncode)
+
+
+def _remove_prefix(str, prefix):
+    if str.startswith(prefix):
+        return str[len(prefix):]
+    return str
+
+
+def _remove_suffix(str, suffix):
+    if str.endswith(suffix):
+        return str[:-len(suffix)]
+    return str
+
+
+def _get_optimization_list(get_name=False):
+    """
+    returns a list of optimization. If `get_name` is True,
+    only basename without extension is returned rather than full file path.
+
+    [one hierarchy]
+    one
+    âââ backends
+    âââ bin
+    âââ doc
+    âââ include
+    âââ lib
+    âââ optimization
+    âââ test
+
+    Optimization options must be placed in `optimization` folder
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # optimization folder
+    files = [f for f in glob.glob(dir_path + '/../optimization/O*.cfg', recursive=True)]
+    # exclude if the name has space
+    files = [s for s in files if not ' ' in s]
+
+    opt_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if os.path.isfile(cand) and os.access(cand, os.R_OK):
+            opt_list.append(cand)
+
+    if get_name == True:
+        # NOTE the name includes prefix 'O'
+        # e.g. O1, O2, ONCHW not just 1, 2, NCHW
+        opt_list = [ntpath.basename(f) for f in opt_list]
+        opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
+
+    return opt_list
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 7028bd9..ada5ff5 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -207,7 +207,7 @@ struct CookParams
   std::string noname;
 };
 
-template <typename T> void cook_graph(const T &graph, CookParams &cp)
+template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp)
 {
   LOGGER(l);
 
@@ -537,6 +537,8 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
   subgraph_builder.add_name(name);
 
   subgraph_vec.emplace_back(subgraph_builder.Finish());
+
+  return symbol_table;
 }
 
 } // namespace
@@ -574,6 +576,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
   // Operation-related
   std::vector<flatbuffers::Offset<::tflite::OperatorCode>> code_vec;
 
+  // SignatureDef-related
+  std::vector<flatbuffers::Offset<::tflite::SignatureDef>> signdef_vec;
+
   // Graphs-related
   std::vector<flatbuffers::Offset<::tflite::SubGraph>> subgraph_vec;
 
@@ -617,13 +622,18 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
     buffer_vec.emplace_back(buffer_builder.Finish());
   }
 
+  // symbol_tables stores symbol_table of each sub graph
+  // this is used to find tensor ID(index) with tensor name
+  std::vector<std::map<std::string, int32_t>> symbol_tables;
+
   //
   // Create Main graph
   //
   CookParams cp{buffer_vec,       code_vec,        subgraph_vec, flatbuffer_builder,
                 builtin_code_map, custom_code_vec, "main"};
 
-  cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+  auto table = cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+  symbol_tables.push_back(table);
 
   //
   // Create subgraphs if exist
@@ -638,11 +648,97 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
     CookParams cp{buffer_vec,       code_vec,        subgraph_vec,      flatbuffer_builder,
                   builtin_code_map, custom_code_vec, stringStream.str()};
 
-    cook_graph<::tflchef::Graph>(graph, cp);
+    auto table = cook_graph<::tflchef::Graph>(graph, cp);
+    symbol_tables.push_back(table);
+  }
+
+  // Create Signature-Def
+  //
+  for (int s = 0; s < model_recipe.signature_def_size(); ++s)
+  {
+    // load from recipe
+    const auto &rec_signature_def = model_recipe.signature_def(s);
+
+    std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_inputs;
+    std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_outputs;
+
+    // which subgraph index to cook
+    auto subgraph_index = 0;
+    if (rec_signature_def.has_subgraph_index())
+    {
+      subgraph_index = rec_signature_def.subgraph_index();
+    }
+    assert(subgraph_index < symbol_tables.size());
+    auto &symbol_table = symbol_tables[subgraph_index];
+
+    // cook for inputs
+    for (int si = 0; si < rec_signature_def.inputs_size(); ++si)
+    {
+      // recipe for input TensorMap
+      auto rec_tm_input = rec_signature_def.inputs(si);
+      auto name = flatbuffer_builder->CreateString(rec_tm_input.name());
+      uint32_t tensor_index = 0;
+      // either tensor or tensor_index should exist
+      assert(rec_tm_input.has_tensor() || rec_tm_input.has_tensor_index());
+      if (rec_tm_input.has_tensor())
+      {
+        // we can get tensor_index from symbol_table
+        auto tensor = rec_tm_input.tensor();
+        tensor_index = symbol_table[tensor];
+      }
+      else
+      {
+        // or we can use tensor_index itself
+        tensor_index = rec_tm_input.tensor_index();
+      }
+
+      ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+      tensormap_builder.add_name(name);
+      tensormap_builder.add_tensor_index(tensor_index);
+      tensormap_inputs.push_back(tensormap_builder.Finish());
+    }
+    // cook for outputs, same as inputs
+    for (int so = 0; so < rec_signature_def.outputs_size(); ++so)
+    {
+      auto rec_tm_output = rec_signature_def.outputs(so);
+      auto name = flatbuffer_builder->CreateString(rec_tm_output.name());
+      uint32_t tensor_index = 0;
+      assert(rec_tm_output.has_tensor() || rec_tm_output.has_tensor_index());
+      if (rec_tm_output.has_tensor())
+      {
+        auto tensor = rec_tm_output.tensor();
+        tensor_index = symbol_table[tensor];
+      }
+      else
+      {
+        tensor_index = rec_tm_output.tensor_index();
+      }
+
+      ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+      tensormap_builder.add_name(name);
+      tensormap_builder.add_tensor_index(tensor_index);
+      tensormap_outputs.push_back(tensormap_builder.Finish());
+    }
+
+    auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs);
+    auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs);
+    auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name());
+    auto key = flatbuffer_builder->CreateString(rec_signature_def.key());
+    // TODO add validation for method_name and key
+
+    ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder};
+    signature_def_builder.add_inputs(inputs);
+    signature_def_builder.add_outputs(outputs);
+    signature_def_builder.add_method_name(method_name);
+    signature_def_builder.add_key(key);
+    signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index());
+
+    signdef_vec.emplace_back(signature_def_builder.Finish());
   }
 
   // Create "Model" arguments
   auto buffers = flatbuffer_builder->CreateVector(buffer_vec);
+  auto signdefs = flatbuffer_builder->CreateVector(signdef_vec);
   auto operator_codes = flatbuffer_builder->CreateVector(code_vec);
   auto subgraphs = flatbuffer_builder->CreateVector(subgraph_vec);
   auto description = flatbuffer_builder->CreateString("Generated by tflchef");
@@ -652,6 +748,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
 
   model_builder.add_version(3);
   model_builder.add_operator_codes(operator_codes);
+  model_builder.add_signature_defs(signdefs);
   model_builder.add_subgraphs(subgraphs);
   model_builder.add_description(description);
   model_builder.add_buffers(buffers);
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 34d50d9..4162cb1 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -647,6 +647,22 @@ message Operation {
   // use the number not listed in the above reserve list
 }
 
+message TensorMap {
+  optional string name = 4;
+  // use tensor as name of the Operand or use tensor_index as order number.
+  // either one should exist.
+  optional string tensor = 5;
+  optional uint32 tensor_index = 6;
+}
+
+message SignatureDef {
+  repeated TensorMap inputs = 4;
+  repeated TensorMap outputs = 5;
+  optional string method_name = 6;
+  optional string key = 10;
+  optional uint32 subgraph_index = 12;
+}
+
 // For additional subgraphs
 message Graph {
   repeated Operand operand = 1;
@@ -664,4 +680,5 @@ message ModelRecipe {
   optional string name = 5;
   optional uint32 version = 6 [default = 1];
   repeated Graph graph = 7;
+  repeated SignatureDef signature_def = 8;
 }
diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe
new file mode 100644
index 0000000..4481752
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_index/test.recipe
@@ -0,0 +1,60 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm1"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm2"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm3"
+}
+signature_def {
+  inputs: {
+    name: "ifm"
+    tensor_index: 0
+  }
+  outputs {
+    name: "ofm2"
+    tensor_index: 2
+  }
+  outputs {
+    name: "ofm3"
+    tensor_index: 3
+  }
+  outputs {
+    name: "ofm1"
+    tensor_index: 1
+  }
+  method_name: "serving_default"
+  key: "serv"
+  subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe
new file mode 100644
index 0000000..79be251
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_name/test.recipe
@@ -0,0 +1,60 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm1"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm2"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm3"
+}
+signature_def {
+  inputs: {
+    name: "ifm"
+    tensor_index: 0
+  }
+  outputs {
+    name: "out2"
+    tensor: "ofm2"
+  }
+  outputs {
+    name: "out3"
+    tensor: "ofm3"
+  }
+  outputs {
+    name: "out1"
+    tensor: "ofm1"
+  }
+  method_name: "serving_default"
+  key: "serv"
+  subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 7a480bc..2351e4c 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -405,7 +405,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
     for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
     {
       auto sign_i = signaturedefs->Get(i);
-      os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key("
+      os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key("
          << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
          << std::endl;
 
@@ -413,16 +413,18 @@ void dump_model(std::ostream &os, const tflite::Model *model)
       for (uint32_t t = 0; t < inputs_i->Length(); ++t)
       {
         auto inputs_i_t = inputs_i->Get(t);
-        os << "    I T(" << t << ") " << inputs_i_t->name()->c_str() << ": "
-           << inputs_i_t->tensor_index() << std::endl;
+        os << "    I(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+           << inputs_i_t->name()->c_str() << std::endl;
       }
 
       auto outputs_i = sign_i->outputs();
       for (uint32_t t = 0; t < outputs_i->Length(); ++t)
       {
         auto outputs_i_t = outputs_i->Get(t);
-        os << "    O T(" << t << ") " << outputs_i_t->name()->c_str() << ": "
-           << outputs_i_t->tensor_index() << std::endl;
+        os << "    O(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+           << outputs_i_t->name()->c_str() << std::endl;
       }
     }
     os << std::endl;
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index 4015631..fb8c211 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -80,7 +80,10 @@ int entry(int argc, char **argv)
   auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024);
 
   // convert tflite to circle
-  tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()};
+  tflite2circle::CircleModel circle_model{flatbuffer_builder};
+
+  circle_model.load_offsets(tfl_model.get_model());
+  circle_model.model_build();
 
   std::ofstream outfile{circle_path, std::ios::binary};
 
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h
index 14c4f1c..189cfaf 100644
--- a/compiler/tflite2circle/include/CircleModel.h
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -60,14 +60,17 @@ template <typename T> class Offset
 private:
   using TFLFlatBufVec = flatbuffers::Vector<typename T::TFL>;
   using CIRFlatBufVecOffset = flatbuffers::Offset<flatbuffers::Vector<typename T::CIR>>;
+  using SignatureDefs = flatbuffers::Vector<flatbuffers::Offset<::tflite::SignatureDef>>;
 
 public:
   Offset(void) = delete;
   Offset(FlatBufBuilder &fb) : _fb{fb} {};
 
 public:
-  // TODO use _fb
-  void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+  void set_signature_defs(const SignatureDefs *offset) { _tfl_signature_def_offsets = offset; }
+
+public:
+  void build(const TFLFlatBufVec *tflite_flatbuffer_vec);
 
 public:
   CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
@@ -75,6 +78,8 @@ public:
 private:
   FlatBufBuilder &_fb;
   CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
+  // TODO revise this when Circle supports SignatureDef
+  const SignatureDefs *_tfl_signature_def_offsets = nullptr;
 };
 
 class CircleModel
@@ -84,9 +89,10 @@ private:
 
 public:
   CircleModel(void) = delete;
-  CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model);
+  CircleModel(FlatBufBuilder &fb);
 
 public:
+  void load_offsets(const tflite::Model *tfl_model);
   void model_build(void) const;
   const char *base(void) const;
   size_t size(void) const;
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index 4249f15..90cc415 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -24,19 +24,16 @@
 namespace tflite2circle
 {
 
-template <>
-void Offset<MetaDataBufferLink>::build(FlatBufBuilder &fb,
-                                       const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<MetaDataBufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   if (tflite_flatbuffer_vec == nullptr)
     return;
   std::vector<int32_t> metadata_buffer_vec{tflite_flatbuffer_vec->begin(),
                                            tflite_flatbuffer_vec->end()};
-  _circle_flatbuffer_vec_offset = fb->CreateVector(metadata_buffer_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(metadata_buffer_vec);
 }
 
-template <>
-void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<BufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
 
@@ -46,21 +43,22 @@ void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_f
     if (it->data())
     {
       std::vector<uint8_t> data_vec{it->data()->begin(), it->data()->end()};
-      buffer_data = fb->CreateVector(data_vec);
+      buffer_data = _fb->CreateVector(data_vec);
     }
-    circle::BufferBuilder circle_buffer_builder{*fb};
+    circle::BufferBuilder circle_buffer_builder{*_fb};
     circle_buffer_builder.add_data(buffer_data);
     auto circle_buffers = circle_buffer_builder.Finish();
     buffers_vec.emplace_back(circle_buffers);
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(buffers_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(buffers_vec);
 }
 
-template <>
-void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
 
+  int32_t subgraph_index = 0;
+
   for (auto it_sg : *tflite_flatbuffer_vec)
   {
     // tensors of subgraph
@@ -74,12 +72,12 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
       if (it->shape())
       {
         auto shape_vec = std::vector<int32_t>({it->shape()->begin(), it->shape()->end()});
-        shape = fb->CreateVector(shape_vec);
+        shape = _fb->CreateVector(shape_vec);
       }
       // name
       flatbuffers::Offset<flatbuffers::String> name;
       if (it->name())
-        name = fb->CreateString(it->name()->str());
+        name = _fb->CreateString(it->name()->str());
       // quantization
       flatbuffers::Offset<circle::QuantizationParameters> quantization;
       if (it->quantization())
@@ -100,8 +98,8 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
           auto rmax = it->quantization()->max();
           tfmin = std::vector<float>{rmin->begin(), rmin->end()};
           tfmax = std::vector<float>{rmax->begin(), rmax->end()};
-          min = fb->CreateVector(tfmin);
-          max = fb->CreateVector(tfmax);
+          min = _fb->CreateVector(tfmin);
+          max = _fb->CreateVector(tfmax);
         }
 
         if (it->quantization()->scale() && it->quantization()->zero_point())
@@ -110,11 +108,11 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
           auto rz = it->quantization()->zero_point();
           tfscale = std::vector<float>{rs->begin(), rs->end()};
           tfzerop = std::vector<int64_t>{rz->begin(), rz->end()};
-          scale = fb->CreateVector(tfscale);
-          zero_point = fb->CreateVector(tfzerop);
+          scale = _fb->CreateVector(tfscale);
+          zero_point = _fb->CreateVector(tfzerop);
         }
 
-        quantization = circle::CreateQuantizationParameters(*fb, min, max, scale, zero_point,
+        quantization = circle::CreateQuantizationParameters(*_fb, min, max, scale, zero_point,
                                                             circle::QuantizationDetails_NONE, 0,
                                                             quantized_dimension);
       }
@@ -135,7 +133,7 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
         {
           auto traversal_order_vec = std::vector<int32_t>{
             it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
-          traversal_order = fb->CreateVector(traversal_order_vec);
+          traversal_order = _fb->CreateVector(traversal_order_vec);
         }
 
         // block_map
@@ -143,7 +141,7 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
         {
           auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
                                                     it->sparsity()->block_map()->end()};
-          block_map = fb->CreateVector(block_map_vec);
+          block_map = _fb->CreateVector(block_map_vec);
         }
 
         // dim_metadata
@@ -154,18 +152,18 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
           // array_segments
           auto tflite_array_segments_type = it->array_segments_type();
           auto circle_array_segments =
-            get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
+            get_circle_sparse_index_vector(*_fb, it->array_segments(), tflite_array_segments_type);
           auto circle_array_segments_type =
             get_circle_sparse_index_vector_type(tflite_array_segments_type);
 
           // array_indices
           auto tflite_array_indices_type = it->array_indices_type();
           auto circle_array_indices =
-            get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
+            get_circle_sparse_index_vector(*_fb, it->array_indices(), tflite_array_indices_type);
           auto circle_array_indices_type =
             get_circle_sparse_index_vector_type(tflite_array_indices_type);
 
-          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*_fb};
 
           circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
           circle_dim_metadata_builder.add_dense_size(it->dense_size());
@@ -176,9 +174,9 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
           auto dim_metadata = circle_dim_metadata_builder.Finish();
           dim_metadata_vec.emplace_back(dim_metadata);
         }
-        dim_metadata = fb->CreateVector(dim_metadata_vec);
+        dim_metadata = _fb->CreateVector(dim_metadata_vec);
 
-        sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+        sparsity = circle::CreateSparsityParameters(*_fb, traversal_order, block_map, dim_metadata);
       }
 
       // shape signature
@@ -187,10 +185,10 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
       {
         auto shape_signature_vec =
           std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
-        shape_signature = fb->CreateVector(shape_signature_vec);
+        shape_signature = _fb->CreateVector(shape_signature_vec);
       }
 
-      circle::TensorBuilder tensor_builder{*fb};
+      circle::TensorBuilder tensor_builder{*_fb};
       tensor_builder.add_shape(shape);
       tensor_builder.add_type(get_circle_tensortype(it->type()));
       tensor_builder.add_buffer(it->buffer());
@@ -202,19 +200,56 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
       auto tensor = tensor_builder.Finish();
       tensor_vec.emplace_back(tensor);
     }
-    auto circle_tensors = fb->CreateVector(tensor_vec);
+    auto circle_tensors = _fb->CreateVector(tensor_vec);
 
     // inputs of subgraph
     auto tflite_inputs = it_sg->inputs();
     std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
 
-    auto circle_inputs = fb->CreateVector(input_vec);
+    // apply signature_def to input tensor index so that input orders are correct
+    // NOTE we do not need this when circle format supports signature_def
+    if (_tfl_signature_def_offsets != nullptr)
+    {
+      for (auto it_signdef : *_tfl_signature_def_offsets)
+      {
+        if (it_signdef->subgraph_index() == subgraph_index)
+        {
+          auto inputs = it_signdef->inputs();
+          assert(inputs->size() == input_vec.size());
+          uint32_t input_vec_idx = 0;
+          for (auto it_tm : *inputs)
+          {
+            input_vec[input_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+          }
+        }
+      }
+    }
+
+    auto circle_inputs = _fb->CreateVector(input_vec);
 
     // outputs of subgraph
     auto tflite_outputs = it_sg->outputs();
     std::vector<int32_t> output_vec{tflite_outputs->begin(), tflite_outputs->end()};
 
-    auto circle_outputs = fb->CreateVector(output_vec);
+    if (_tfl_signature_def_offsets != nullptr)
+    {
+      // apply SignatureDef
+      for (auto it_signdef : *_tfl_signature_def_offsets)
+      {
+        if (it_signdef->subgraph_index() == subgraph_index)
+        {
+          auto outputs = it_signdef->outputs();
+          assert(outputs->size() == output_vec.size());
+          uint32_t output_vec_idx = 0;
+          for (auto it_tm : *outputs)
+          {
+            output_vec[output_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+          }
+        }
+      }
+    }
+
+    auto circle_outputs = _fb->CreateVector(output_vec);
 
     // operators of subgraph
     std::vector<flatbuffers::Offset<circle::Operator>> operator_vec;
@@ -226,12 +261,12 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
       {
         // inputs
         std::vector<int32_t> input_vec{it->inputs()->begin(), it->inputs()->end()};
-        auto circle_inputs = fb->CreateVector(input_vec);
+        auto circle_inputs = _fb->CreateVector(input_vec);
         // outputs
         std::vector<int32_t> output_vec{it->outputs()->begin(), it->outputs()->end()};
-        auto circle_outputs = fb->CreateVector(output_vec);
+        auto circle_outputs = _fb->CreateVector(output_vec);
         // builtin options
-        auto circle_builtin_options = get_circle_builtin_options(*fb, it);
+        auto circle_builtin_options = get_circle_builtin_options(*_fb, it);
         auto circle_builtin_options_type = get_circle_builtin_options_type(it);
         // custom options
         flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
@@ -239,14 +274,14 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
         {
           std::vector<uint8_t> custom_options_vec{it->custom_options()->begin(),
                                                   it->custom_options()->end()};
-          circle_custom_options = fb->CreateVector(custom_options_vec);
+          circle_custom_options = _fb->CreateVector(custom_options_vec);
         }
         // custom options format
         // TODO Make get_circle_custom_options_format
         assert(it->custom_options_format() == tflite::CustomOptionsFormat_FLEXBUFFERS);
         auto circle_custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
 
-        circle::OperatorBuilder operator_builder{*fb};
+        circle::OperatorBuilder operator_builder{*_fb};
         operator_builder.add_opcode_index(it->opcode_index());
         operator_builder.add_inputs(circle_inputs);
         operator_builder.add_outputs(circle_outputs);
@@ -259,13 +294,13 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
         operator_vec.emplace_back(opeartor);
       }
     }
-    auto circle_operators = fb->CreateVector(operator_vec);
+    auto circle_operators = _fb->CreateVector(operator_vec);
 
     // name of subgraph
-    auto subgraphs_name = fb->CreateString(it_sg->name());
+    auto subgraphs_name = _fb->CreateString(it_sg->name());
 
     // subgraphs
-    auto circle_subgraph_builder = circle::SubGraphBuilder{*fb};
+    auto circle_subgraph_builder = circle::SubGraphBuilder{*_fb};
 
     circle_subgraph_builder.add_tensors(circle_tensors);
     circle_subgraph_builder.add_inputs(circle_inputs);
@@ -276,8 +311,11 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
 
     auto circle_subgraph = circle_subgraph_builder.Finish();
     subgprahs_vec.emplace_back(circle_subgraph);
+
+    // next subgraph
+    subgraph_index = subgraph_index + 1;
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec);
 }
 
 tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
@@ -291,15 +329,14 @@ tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
   return opcode->builtin_code();
 }
 
-template <>
-void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
 
   for (auto it : *tflite_flatbuffer_vec)
   {
-    auto custom_code = fb->CreateString(it->custom_code());
-    circle::OperatorCodeBuilder operator_code_builder{*fb};
+    auto custom_code = _fb->CreateString(it->custom_code());
+    circle::OperatorCodeBuilder operator_code_builder{*_fb};
     // TODO support circle deprecated_builtin_code
     auto bt_code = builtin_code_neutral(it);
     operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
@@ -308,23 +345,28 @@ void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tf
     auto code = operator_code_builder.Finish();
     operator_code_vec.emplace_back(code);
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(operator_code_vec);
 }
 
-CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model)
+CircleModel::CircleModel(FlatBufBuilder &fb)
   : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb}
 {
-  _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb);
-  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb);
-  _buffers_offset = std::make_unique<Offset<BufferLink>>(fb);
-  _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(fb);
+  // NOTHING TODO
+}
+
+void CircleModel::load_offsets(const tflite::Model *tfl_model)
+{
+  _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(_fb);
+  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(_fb);
+  _buffers_offset = std::make_unique<Offset<BufferLink>>(_fb);
+  _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(_fb);
 
-  _operator_codes_offset->build(fb, tfl_model->operator_codes());
-  _subGraphs_offset->build(fb, tfl_model->subgraphs());
-  _buffers_offset->build(fb, tfl_model->buffers());
-  _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer());
+  _subGraphs_offset->set_signature_defs(tfl_model->signature_defs());
 
-  model_build();
+  _operator_codes_offset->build(tfl_model->operator_codes());
+  _subGraphs_offset->build(tfl_model->subgraphs());
+  _buffers_offset->build(tfl_model->buffers());
+  _metadata_buffer_offset->build(tfl_model->metadata_buffer());
 }
 
 void CircleModel::model_build(void) const
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 50ee052..2241c9e 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000120001)
+  set(VCONONE_VERSION 0x0000000000130001)
   # NOTE order is [build patch minor major]
   # if VCONONE_VERSION is set with -D option, it will be cached
   # you may have to remove cache file if you remove -D option
diff --git a/docs/conf.py b/docs/conf.py
index b59cab8..ff4070f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
 author = 'Samsung Research & contributors'
 
 # The full version, including alpha/beta/rc tags
-release = '1.18.0'
+release = '1.19.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/release/1.19/index.rst b/docs/release/1.19/index.rst
new file mode 100644
index 0000000..c80782c
--- /dev/null
+++ b/docs/release/1.19/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Nov 10 15:21:13 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.19
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.19.0.md
diff --git a/docs/release/1.19/release-note-1.19.0.md b/docs/release/1.19/release-note-1.19.0.md
new file mode 100644
index 0000000..e63d870
--- /dev/null
+++ b/docs/release/1.19/release-note-1.19.0.md
@@ -0,0 +1,8 @@
+# Release Note 1.19.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `circle-quantizer` supports input/output type option
+- Introduce configuration file for optimization options
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
index b48239f..99118c5 100644
--- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -3,8 +3,10 @@ function(_CMSISSource_import)
   nnas_include(OptionTools)
 
   envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+  set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad)
 
-  ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL})
+  ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}
+          CHECKSUM "SHA256=${CMSIS_5_8_0_SHA256}")
 
   set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE)
   set(CMSISSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
index 0eb8eb9..8b0a602 100644
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
                       INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
                       BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix4"
-                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      IDENTIFIER  "1.10-fix6"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
                       PKG_NAME    "FLATBUFFERS-1.10")
 
 endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
index daa749c..06366db 100644
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
                       INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
                       BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.12-fix1"
-                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      IDENTIFIER  "1.12-fix3"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
                       PKG_NAME    "FLATBUFFERS-1.12")
 
 endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake
index e551e29..8b0a602 100644
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
                       INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
                       BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix4"
-                      EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      IDENTIFIER  "1.10-fix6"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
                       PKG_NAME    "FLATBUFFERS-1.10")
 
 endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
index 8b17430..8055545 100644
--- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -3,8 +3,10 @@ function(_MbedOSSource_import)
   nnas_include(OptionTools)
 
   envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+  set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e)
 
-  ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL})
+  ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}
+          CHECKSUM "SHA256=${MBEDOS_6_15_SHA256}")
 
   set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE)
   set(MbedOSSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
index 12af5f9..2763ac5 100644
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,10 @@
+one (1.19.0) bionic; urgency=medium
+
+  * `circle-quantizer` supports input/output type option
+  * Introduce configuration file for optimization options
+
+ -- seongwoo <mhs4670go@naver.com>  Wed, 10 Nov 2021 15:53:39 +0900
+
 one (1.18.0) bionic; urgency=medium
 
   * More optimization pass
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
index ee0d3e6..4cf0abc 100644
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,9 @@
+one (1.19.0) bionic; urgency=low
+
+  * Synch up version with ONE Compiler
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Wed, 10 Nov 2021 14:23:00 +0900
+
 one (1.18.0) bionic; urgency=low
 
   * Synch up version with ONE Compiler
diff --git a/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
new file mode 100644
index 0000000..544be03
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
@@ -0,0 +1,66 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+#   - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+#   - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+        "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+        "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+    set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+    set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+        "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+    set(FLOAT hard)
+else()
+    set(FLOAT soft)
+endif()
+
+if (FLOAT)
+    add_compile_options(-mfloat-abi=${FLOAT})
+    add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+        -Wno-all
+)
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
index 4761e84..6ae7dea 100644
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -125,6 +125,13 @@ function(_ARMCompute_Build ARMComputeInstall_DIR)
   list(APPEND SCONS_OPTIONS "Werror=0")
   list(APPEND SCONS_OPTIONS "os=${TARGET_OS}")
 
+  #### Disable test build
+  list(APPEND SCONS_OPTIONS "benchmark_tests=0")
+  list(APPEND SCONS_OPTIONS "validation_tests=0")
+  list(APPEND SCONS_OPTIONS "benchmark_examples=0")
+  list(APPEND SCONS_OPTIONS "validate_examples=0")
+  list(APPEND SCONS_OPTIONS "reference_openmp=0")
+
   if(DEFINED EXTERNALS_BUILD_THREADS)
     set(N ${EXTERNALS_BUILD_THREADS})
   else(DEFINED EXTERNALS_BUILD_THREADS)
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 4133d7a..547d46a 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
 Name:    nnfw
 Summary: nnfw
-Version: 1.18.0
+Version: 1.19.0
 Release: 1
 Group:   Development
 License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
diff --git a/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
new file mode 100644
index 0000000..2cd7b90
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
@@ -0,0 +1,34 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: ""
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
new file mode 100644
index 0000000..ead0c33
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
@@ -0,0 +1,42 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "Tanh"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operation {
+  type: "Tanh"
+  input: "in"
+  output: "Tanh"
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "Tanh"
+  input: "weight"
+  input: ""
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
new file mode 100644
index 0000000..ae993e6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
@@ -0,0 +1,82 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm2"
+  mul_options {
+    activation: 0
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm3"
+  sub_options {
+    activation: 0
+  }
+}
+signature_def {
+  inputs: {
+    name: "ifm1"
+    tensor_index: 0
+  }
+  inputs: {
+    name: "ifm2"
+    tensor_index: 1
+  }
+  outputs {
+    name: "ofm2"
+    tensor_index: 3
+  }
+  outputs {
+    name: "ofm3"
+    tensor_index: 4
+  }
+  outputs {
+    name: "ofm1"
+    tensor_index: 2
+  }
+  method_name: "serving_default"
+  key: "serv"
+  subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index b432929..9b8840f 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
         minSdkVersion 26
         targetSdkVersion 29
         versionCode 1
-        versionName "1.18.0"
+        versionName "1.19.0"
 
         externalNativeBuild {
             ndkBuild {
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 4fce291..b885a6b 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01001200
+#define NNFW_VERSION 0x01001300
 
 #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
index a7185ca..514c014 100644
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ b/runtime/onert/test/core/compiler/HEScheduler.cc
@@ -351,14 +351,19 @@ protected:
   std::string _original_profiling_mode;
 };
 
+//
+// HEScheduler tests
+//
+
 class HESchedulerTestWithExecutorParam : public HESchedulerTest,
                                          public testing::WithParamInterface<std::string>
 {
 };
 
-//
-// HEScheduler tests
-//
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+                        testing::Values(LINEAR, DATAFLOW, PARALLEL));
 
 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
 TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
@@ -490,11 +495,6 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
   }
 }
 
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
-                        testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
 // Test scheduler behavior for branched graph and enabled profiling mode
 TEST_F(HESchedulerTest, branched_graph_profiling_mode)
 {
-- 
2.7.4